Add blog post for building ml workflow (#3)

eagleonhill · web-flow · commit 10fd58fab177 · 2025-09-25T09:48:35.000-07:00
diff --git a/.vitepress/config.mts b/.vitepress/config.mts
@@ -1,5 +1,6 @@
 import { defineConfig } from 'vitepress'
 import { withMermaid } from "vitepress-plugin-mermaid";
+import { blogPosts } from './data/blogPosts';
 
 export default withMermaid(defineConfig({
   title: "Velda",
@@ -28,12 +29,10 @@ export default withMermaid(defineConfig({
       '/blog/': [
         {
           text: 'Blog',
-          items: [
-            { text: 'Latest Posts', link: '/blog/' },
-            { text: "vrun is all you need: Revolutionizing Development with One Command", link: "/blog/vrun-is-all-you-need" },
-            { text: "Why AI/ML Researchers Are Stuck with Inefficient GPU Setups (And How to Fix It)", link: "/blog/why-stuck-inefficient-gpu-setup" },
-            { text: 'Introducing Velda', link: '/blog/introducing-velda' }
-          ]
+          items: [{text: "Latest Posts", link: '/blog/'}, ...blogPosts.map(post => ({
+            text: post.title,
+            link: `/blog/${post.slug}`
+          }))]
         }
       ],
       '/': [
diff --git a/.vitepress/data/blogPosts.ts b/.vitepress/data/blogPosts.ts
@@ -13,9 +13,34 @@ export interface BlogPost {
 
 export const blogPosts: BlogPost[] = [
   {
-    "title": "vrun is All You Need: Revolutionizing AI/ML Development with One Command",
+    "title": "Building a Scalable ML Workflow with Velda",
+    "slug": "build-machine-learning-workflow",
+    "description": "Learn how to build robust, scalable machine learning workflows using Velda's vrun and vbatch commands. From simple pipelines to complex fan-out patterns for parallel processing.",
+    "excerpt": "Build sophisticated ML workflows with Velda's simple commands. Learn to create pipelines with dependencies, parallel processing, and fan-out patterns for scalable machine learning.",
+    "date": "2025-09-25",
+    "author": "Chuan Qiu",
+    "readingTime": "5 min",
+    "category": "Technical Tutorial",
+    "image": "https://cdn-images-1.medium.com/max/2400/1*2Ej2vw32-janKdPbfp1gKg.png",
+    "tags": [
+      "machine-learning",
+      "ml-workflow",
+      "data-processing",
+      "ai-pipeline",
+      "cloud-computing",
+      "vrun",
+      "vbatch",
+      "parallel-processing",
+      "model-training",
+      "data-science",
+      "mlops",
+      "workflow-automation"
+    ]
+  },
+  {
+    "title": "vrun is All You Need: Revolutionizing Development with One Command",
     "slug": "vrun-is-all-you-need",
-    "description": "Discover how Velda's vrun command transforms AI/ML development by providing instant, scalable cloud compute that feels like local execution. Eliminate inefficient GPU setups and complex orchestration with this game-changing tool.",
+    "description": "Discover how Velda's vrun command transforms development by providing instant, scalable cloud compute that feels like local execution. Eliminate inefficient GPU setups and complex orchestration with this game-changing tool.",
     "excerpt": "Why vrun is the ultimate solution for AI/ML researchers struggling with inefficient GPU setups. Learn how one command can provide instant scaling, cost efficiency, and seamless development experience.",
     "date": "2025-09-15",
     "author": "Chuan Qiu",
@@ -78,4 +103,4 @@ export const blogPosts: BlogPost[] = [
       "machine-learning"
     ]
   }
-];
+];
diff --git a/.vitepress/theme/ComparisonLayout.vue b/.vitepress/theme/ComparisonLayout.vue
@@ -35,26 +35,26 @@
     margin-right: auto;
 }
 
-.container.vp-doc ::v-deep h1 {
+.container.vp-doc :deep(h1) {
     text-align: center;
 }
-.comparison-table ::v-deep > table {
+.comparison-table :deep(table) {
     width: 100%;
     table-layout: fixed;
     display: table;
 }
-.comparison-table ::v-deep > table > thead > tr > th:nth-child(1),
-.comparison-table ::v-deep > table > tbody > tr > td:nth-child(1) {
+.comparison-table :deep(table > thead > tr > th:nth-child(1)),
+.comparison-table :deep(table > tbody > tr > td:nth-child(1)) {
     width: 50%;
 }
 
-.comparison-table ::v-deep > table > thead > tr > th:nth-child(2),
-.comparison-table ::v-deep > table > tbody > tr > td:nth-child(2) {
+.comparison-table :deep(table > thead > tr > th:nth-child(2)),
+.comparison-table :deep(table > tbody > tr > td:nth-child(2)) {
     width: 25%;
 }
 
-.comparison-table ::v-deep > table > thead > tr > th:nth-child(3),
-.comparison-table ::v-deep > table > tbody > tr > td:nth-child(3) {
+.comparison-table :deep(table > thead > tr > th:nth-child(3)),
+.comparison-table :deep(table > tbody > tr > td:nth-child(3)) {
     width: 25%;
 }
 
diff --git a/blog/build-machine-learning-workflow.md b/blog/build-machine-learning-workflow.md
@@ -0,0 +1,150 @@
+---
+sidebar: false
+title: "Building a Scalable ML Workflow with Velda"
+description: "Learn how to build robust, scalable machine learning workflows using Velda's vrun and vbatch commands. From simple pipelines to complex fan-out patterns for parallel processing."
+date: 2025-09-25
+author: Chuan Qiu
+tags: [machine-learning, ml-workflow, data-processing, ai-pipeline, cloud-computing, vrun, vbatch, parallel-processing, model-training, data-science, mlops, workflow-automation]
+keywords: ["machine learning workflow", "ML pipeline", "vrun command", "vbatch tutorial", "parallel data processing", "model training pipeline", "cloud ML workflows", "scalable ML", "AI development", "data processing automation", "MLOps pipeline", "fan-out pattern"]
+image: "https://cdn-images-1.medium.com/max/2400/1*2Ej2vw32-janKdPbfp1gKg.png"
+excerpt: "Build sophisticated ML workflows with Velda's simple commands. Learn to create pipelines with dependencies, parallel processing, and fan-out patterns for scalable machine learning."
+readingTime: "5 min"
+category: "Technical Tutorial"
+---
+# **Building a Scalable ML Workflow with Velda**
+
+In the world of machine learning, building a robust and scalable ML workflow is as crucial as the model itself. A well-structured pipeline not only ensures reproducibility but also significantly speeds up experimentation and deployment cycles. Today, we'll explore how to build such a pipeline using Velda's command-line tools that simplify cloud resource management.
+
+## **Getting Started: `vrun`, `vbatch`, and `vbatch -f`**
+
+`vrun` is your gateway to running commands on the cloud without the headache of managing underlying infrastructure. [It's as simple as prefixing your command with vrun](https://velda.io/blog/vrun-is-all-you-need), and everything else like packaging is automatically handled. For instance, to run a Python script on an 8-CPU instance, you would use:
+
+```
+vrun -P cpu-8 python my_script.py
+```
+
+For long-running tasks, you can use `vbatch`, which runs the command in the background, allowing you to move on to other tasks.
+
+```
+JOB_ID=$(vbatch -P cpu-8 python my_long_running_script.py)
+```
+
+The `vbatch` command prints out a task ID that you can reference later. It also provides a URL for tracking progress. To view the logs of the task in your terminal, use the `velda task log` command:
+
+```
+velda task log ${JOB_ID}
+```
+
+You can also combine these approaches using `vbatch -f`: Since most errors occur during the initial phase of a job, the `-f` option waits until the task starts and streams the logs. You can interrupt with `Ctrl-C` at any time while keeping the task running in the background.
+
+```
+vbatch -f -P cpu-8 python my_long_running_script.py
+```
+<img src="https://cdn-images-1.medium.com/max/1600/1*SAFtoS16dTi2npTcV1Ofgw.png" alt="Screenshot of task logs" />
+<center><small>Web page for task details and logs</small></center>
+
+## **Simple Machine Learning Workflow: Process, Train, Evaluate**
+
+Let's build a simple ML workflow with three stages: data processing, model training, and evaluation. We can chain these tasks together using the `--after-success` flag, which ensures a task starts only after its dependencies have successfully completed. Assuming you already have scripts for each step, creating the pipeline in Velda is as simple as running these commands:
+
+```
+# Process the data, e.g. data cleaning
+vbatch -P cpu-16 --name process python process_data.py
+
+# Train the model after processing is done
+vbatch -P gpu-1 --name train --after-success process python train_model.py
+
+# Model evaluation after training is complete
+vbatch -P cpu-8 --name eval --after-success train python evaluate_model.py
+```
+
+This creates a linear pipeline where each step is executed in sequence.
+
+<img src="https://cdn-images-1.medium.com/max/1600/1*UFUi0CM4fZaN1ey4MnZfmQ.png" alt="Task list view" />
+<center><small>Web page for task list view</small></center>
+
+## **Batch Processing: Processing Data in Parallel**
+
+In many real-world scenarios, you'll need to process a large number of data files. This is where the "fan-out" pattern comes in handy. We can use standard bash commands like `xargs` or a `for` loop to process all files from a source in parallel.
+
+For example, to process all .csv files in a directory:
+
+```
+vbatch bash -c "ls *.csv | xargs -I {} vbatch --name {} -P cpu-8 python process_file.py {}"
+```
+
+This command launches a separate task for each .csv file, processing them in parallel. Since it's wrapped under one top-level `vbatch` command, every individual command is grouped under one parent task for better organization and searchability.
+
+Keep in mind that there's always some overhead for starting a task (about 1 second), so we recommend that each task runs for at least one minute. If needed, you can chunk the inputs:
+
+```
+vbatch bash -c "ls *.csv | xargs -L 100 vbatch -P cpu-8 python process_file.py"
+```
+
+This automatically groups up to 100 files in each task and reduces scheduling overhead.
+
+## **Embedding the Fan-Out in an ML Pipeline**
+
+Now, let's embed this fan-out step into a larger ML workflow. We can create a bash script that contains the fan-out logic and then execute that script as a step in our pipeline.
+
+`process_all.sh`
+
+```
+#!/bin/bash
+ls *.csv | xargs -I {} vrun -P cpu-8 python process_file.py {}
+```
+
+Now, we can incorporate this into our main pipeline:
+
+```
+# Process all data files in parallel
+vbatch -P cpu-16 --name preprocess ./process_all.sh
+
+# Train the model after all files are processed
+vbatch -P gpu-1 --name train --after-success preprocess python train_model.py
+
+# Evaluate the model
+vbatch -P cpu-8 --name eval --after-success train python evaluate_model.py
+```
+
+The pipeline above will start train only if all pre-processing tasks has been completed. With that, you can process thousands
+of data sets without any complex orchestration tools.
+
+Looking for more granular data processing like Ray/DataFlow? No problem, in a future tutorial we will also show how to 
+use Velda to scale up your data processing pipelines.
+
+## **More Complex Pipelines: Recursive Fan-Outs**
+
+For more complex scenarios, you can define recursive pipelines within the fan-out pattern, or any hierarhcy that you need. For example, for each data point, you might want to run inference, evaluate the result, and then aggregate the evaluations. This can be achieved by defining a "sub-pipeline" for each data point.
+
+Let's say we have a script `inference_and_eval.sh` that takes a data file as input and performs both inference and evaluation:
+
+`inference_and_eval.sh`
+
+```
+#!/bin/bash
+DATA_FILE=$1
+
+# Run inference
+vrun -P gpu-1 --name inference ./run_inference.py $DATA_FILE
+# Evaluate the inference result
+vrun -P cpu-4 --after-success inference ./evaluate_inference.py $DATA_FILE
+```
+
+Now, we can use this script in our fan-out:
+
+```
+vbatch bash -c "ls *.csv | xargs -I {} vbatch --name {} ./inference_and_eval.sh {}"
+```
+
+This creates a two-level pipeline for each data file. The power of this approach is that you can build arbitrarily complex and recursive ML workflows that remain easy to manage and scale.
+
+By leveraging Velda's `vrun` and `vbatch` commands along with bash scripting, you can build sophisticated, scalable, and reproducible ML workflows with ease. This allows you to focus on what matters most: building great models.
+
+## **Getting Started Today**
+
+Ready to build your first pipeline? A few options to get started:
+
+1. [**Open Source**](https://github.com/velda-io/velda): Try Velda's open-source edition
+2. [**Enterprise**](https://velda.io/book): Deploy with SSO, RBAC, and advanced observability
+3. **Hosted(Coming soon)**: Immediately scale with Velda's managed platform
diff --git a/blog/vrun-is-all-you-need.md b/blog/vrun-is-all-you-need.md
@@ -16,7 +16,7 @@ category: "Technical Blog"
 
 ## **The Cloud Development Dilemma**
 
-Picture this: You're a developer working on a machine learning project. Your local laptop struggles with training models, so you spin up a cloud VM. But now you're spending hours setting up the new environment, and paying for compute power even when you're in meetings or writing documentation. When you actually need serious computational power like multi-node ai training, you're stuck dealing with Kubernetes manifests that feel like learning a foreign language.
+Picture this: You're a developer working on a machine learning project. Your local laptop struggles with training models, so you spin up a cloud VM. But now you're spending hours setting up the new environment, and paying for compute power even when you're in meetings or writing documentation. When you actually need serious computational power like multi-node AI training, you're stuck dealing with Kubernetes manifests that feel like learning a foreign language.
 
 Sound familiar? You're not alone. Traditional cloud development tools force developers to choose between expensive always-on instances or complex orchestration platforms that require DevOps expertise just to run a simple training job.
 
diff --git a/sitemap.md b/sitemap.md
@@ -23,6 +23,7 @@ description: Complete site navigation for Velda - find all pages, blog posts, an
 
 ## 📰 Blog
 - [Blog Home](/blog/) - Latest posts and updates
+- [Building a Scalable ML Workflow with Velda](/blog/build-machine-learning-workflow) - *September 24, 2025*
 - [vrun is All You Need: Revolutionizing Development with One Command](/blog/vrun-is-all-you-need) - *September 14, 2025*
 - [Why AI/ML Researchers Are Stuck with Inefficient GPU Setups (And How to Fix It)](/blog/why-stuck-inefficient-gpu-setup) - *September 7, 2025*
 - [Velda Blog - Cloud Development Insights & Updates](/blog/) - *September 7, 2025*
diff --git a/slurm-alternative.md b/slurm-alternative.md
@@ -27,8 +27,8 @@ image: "https://velda.io/og-preview.png"
 
 | Feature | Slurm | Velda |
 |--------------------|--------------------|--------------------|
-| **Containerized Execution**<br>Each task runs in a container that is isolated with other workloads on the same node, and developers cannot access tasks of other users| ❌ | ✅ |
-| **Environment customization**<br>Every workload can run in fully customizable environment, including system packages(apt, pip, etc.) | ❌ | ✅ |
+| **Containerized Execution**<br>Each task runs in a container that is isolated with other workloads on the same node, and workload cannot access tasks of other users| ❌ | ✅ |
+| **Environment customization**<br>Every workload can run in fully customizable environment, including some system packages(apt, pip, etc.) | ❌ | ✅ |
 | **Email**<br>Get notified when your job is completed | ❌ | ✅ |
 | **Cluster autoscale**<br>Allocate compute resources from Cloud / Kubernetes based on demand | ❌ | ✅ |
 | **Interactive Development**<br>Developers have access to dedicated dev-environments, with full capability like IDEs and docker access | ❌ | ✅ |

Original file line number	Diff line number	Diff line change
`@@ -35,26 +35,26 @@`
`35`	`35`	`margin-right: auto;`
`36`	`36`	`}`
`37`	`37`
`38`		`-.container.vp-doc ::v-deep h1 {`
	`38`	`+.container.vp-doc :deep(h1) {`
`39`	`39`	`text-align: center;`
`40`	`40`	`}`
`41`		`-.comparison-table ::v-deep > table {`
	`41`	`+.comparison-table :deep(table) {`
`42`	`42`	`width: 100%;`
`43`	`43`	`table-layout: fixed;`
`44`	`44`	`display: table;`
`45`	`45`	`}`
`46`		`-.comparison-table ::v-deep > table > thead > tr > th:nth-child(1),`
`47`		`-.comparison-table ::v-deep > table > tbody > tr > td:nth-child(1) {`
	`46`	`+.comparison-table :deep(table > thead > tr > th:nth-child(1)),`
	`47`	`+.comparison-table :deep(table > tbody > tr > td:nth-child(1)) {`
`48`	`48`	`width: 50%;`
`49`	`49`	`}`
`50`	`50`
`51`		`-.comparison-table ::v-deep > table > thead > tr > th:nth-child(2),`
`52`		`-.comparison-table ::v-deep > table > tbody > tr > td:nth-child(2) {`
	`51`	`+.comparison-table :deep(table > thead > tr > th:nth-child(2)),`
	`52`	`+.comparison-table :deep(table > tbody > tr > td:nth-child(2)) {`
`53`	`53`	`width: 25%;`
`54`	`54`	`}`
`55`	`55`
`56`		`-.comparison-table ::v-deep > table > thead > tr > th:nth-child(3),`
`57`		`-.comparison-table ::v-deep > table > tbody > tr > td:nth-child(3) {`
	`56`	`+.comparison-table :deep(table > thead > tr > th:nth-child(3)),`
	`57`	`+.comparison-table :deep(table > tbody > tr > td:nth-child(3)) {`
`58`	`58`	`width: 25%;`
`59`	`59`	`}`
`60`	`60`