Skip to content

Commit 10fd58f

Browse files
authored
Add blog post for building ml workflow (#3)
1 parent a313ead commit 10fd58f

File tree

7 files changed

+195
-20
lines changed

7 files changed

+195
-20
lines changed

.vitepress/config.mts

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { defineConfig } from 'vitepress'
22
import { withMermaid } from "vitepress-plugin-mermaid";
3+
import { blogPosts } from './data/blogPosts';
34

45
export default withMermaid(defineConfig({
56
title: "Velda",
@@ -28,12 +29,10 @@ export default withMermaid(defineConfig({
2829
'/blog/': [
2930
{
3031
text: 'Blog',
31-
items: [
32-
{ text: 'Latest Posts', link: '/blog/' },
33-
{ text: "vrun is all you need: Revolutionizing Development with One Command", link: "/blog/vrun-is-all-you-need" },
34-
{ text: "Why AI/ML Researchers Are Stuck with Inefficient GPU Setups (And How to Fix It)", link: "/blog/why-stuck-inefficient-gpu-setup" },
35-
{ text: 'Introducing Velda', link: '/blog/introducing-velda' }
36-
]
32+
items: [{text: "Latest Posts", link: '/blog/'}, ...blogPosts.map(post => ({
33+
text: post.title,
34+
link: `/blog/${post.slug}`
35+
}))]
3736
}
3837
],
3938
'/': [

.vitepress/data/blogPosts.ts

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,34 @@ export interface BlogPost {
1313

1414
export const blogPosts: BlogPost[] = [
1515
{
16-
"title": "vrun is All You Need: Revolutionizing AI/ML Development with One Command",
16+
"title": "Building a Scalable ML Workflow with Velda",
17+
"slug": "build-machine-learning-workflow",
18+
"description": "Learn how to build robust, scalable machine learning workflows using Velda's vrun and vbatch commands. From simple pipelines to complex fan-out patterns for parallel processing.",
19+
"excerpt": "Build sophisticated ML workflows with Velda's simple commands. Learn to create pipelines with dependencies, parallel processing, and fan-out patterns for scalable machine learning.",
20+
"date": "2025-09-25",
21+
"author": "Chuan Qiu",
22+
"readingTime": "5 min",
23+
"category": "Technical Tutorial",
24+
"image": "https://cdn-images-1.medium.com/max/2400/1*2Ej2vw32-janKdPbfp1gKg.png",
25+
"tags": [
26+
"machine-learning",
27+
"ml-workflow",
28+
"data-processing",
29+
"ai-pipeline",
30+
"cloud-computing",
31+
"vrun",
32+
"vbatch",
33+
"parallel-processing",
34+
"model-training",
35+
"data-science",
36+
"mlops",
37+
"workflow-automation"
38+
]
39+
},
40+
{
41+
"title": "vrun is All You Need: Revolutionizing Development with One Command",
1742
"slug": "vrun-is-all-you-need",
18-
"description": "Discover how Velda's vrun command transforms AI/ML development by providing instant, scalable cloud compute that feels like local execution. Eliminate inefficient GPU setups and complex orchestration with this game-changing tool.",
43+
"description": "Discover how Velda's vrun command transforms development by providing instant, scalable cloud compute that feels like local execution. Eliminate inefficient GPU setups and complex orchestration with this game-changing tool.",
1944
"excerpt": "Why vrun is the ultimate solution for AI/ML researchers struggling with inefficient GPU setups. Learn how one command can provide instant scaling, cost efficiency, and seamless development experience.",
2045
"date": "2025-09-15",
2146
"author": "Chuan Qiu",
@@ -78,4 +103,4 @@ export const blogPosts: BlogPost[] = [
78103
"machine-learning"
79104
]
80105
}
81-
];
106+
];

.vitepress/theme/ComparisonLayout.vue

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,26 +35,26 @@
3535
margin-right: auto;
3636
}
3737
38-
.container.vp-doc ::v-deep h1 {
38+
.container.vp-doc :deep(h1) {
3939
text-align: center;
4040
}
41-
.comparison-table ::v-deep > table {
41+
.comparison-table :deep(table) {
4242
width: 100%;
4343
table-layout: fixed;
4444
display: table;
4545
}
46-
.comparison-table ::v-deep > table > thead > tr > th:nth-child(1),
47-
.comparison-table ::v-deep > table > tbody > tr > td:nth-child(1) {
46+
.comparison-table :deep(table > thead > tr > th:nth-child(1)),
47+
.comparison-table :deep(table > tbody > tr > td:nth-child(1)) {
4848
width: 50%;
4949
}
5050
51-
.comparison-table ::v-deep > table > thead > tr > th:nth-child(2),
52-
.comparison-table ::v-deep > table > tbody > tr > td:nth-child(2) {
51+
.comparison-table :deep(table > thead > tr > th:nth-child(2)),
52+
.comparison-table :deep(table > tbody > tr > td:nth-child(2)) {
5353
width: 25%;
5454
}
5555
56-
.comparison-table ::v-deep > table > thead > tr > th:nth-child(3),
57-
.comparison-table ::v-deep > table > tbody > tr > td:nth-child(3) {
56+
.comparison-table :deep(table > thead > tr > th:nth-child(3)),
57+
.comparison-table :deep(table > tbody > tr > td:nth-child(3)) {
5858
width: 25%;
5959
}
6060
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
---
2+
sidebar: false
3+
title: "Building a Scalable ML Workflow with Velda"
4+
description: "Learn how to build robust, scalable machine learning workflows using Velda's vrun and vbatch commands. From simple pipelines to complex fan-out patterns for parallel processing."
5+
date: 2025-09-25
6+
author: Chuan Qiu
7+
tags: [machine-learning, ml-workflow, data-processing, ai-pipeline, cloud-computing, vrun, vbatch, parallel-processing, model-training, data-science, mlops, workflow-automation]
8+
keywords: ["machine learning workflow", "ML pipeline", "vrun command", "vbatch tutorial", "parallel data processing", "model training pipeline", "cloud ML workflows", "scalable ML", "AI development", "data processing automation", "MLOps pipeline", "fan-out pattern"]
9+
image: "https://cdn-images-1.medium.com/max/2400/1*2Ej2vw32-janKdPbfp1gKg.png"
10+
excerpt: "Build sophisticated ML workflows with Velda's simple commands. Learn to create pipelines with dependencies, parallel processing, and fan-out patterns for scalable machine learning."
11+
readingTime: "5 min"
12+
category: "Technical Tutorial"
13+
---
14+
# **Building a Scalable ML Workflow with Velda**
15+
16+
In the world of machine learning, building a robust and scalable ML workflow is as crucial as the model itself. A well-structured pipeline not only ensures reproducibility but also significantly speeds up experimentation and deployment cycles. Today, we'll explore how to build such a pipeline using Velda's command-line tools that simplify cloud resource management.
17+
18+
## **Getting Started: `vrun`, `vbatch`, and `vbatch -f`**
19+
20+
`vrun` is your gateway to running commands on the cloud without the headache of managing underlying infrastructure. [It's as simple as prefixing your command with vrun](https://velda.io/blog/vrun-is-all-you-need), and everything else like packaging is automatically handled. For instance, to run a Python script on an 8-CPU instance, you would use:
21+
22+
```
23+
vrun -P cpu-8 python my_script.py
24+
```
25+
26+
For long-running tasks, you can use `vbatch`, which runs the command in the background, allowing you to move on to other tasks.
27+
28+
```
29+
JOB_ID=$(vbatch -P cpu-8 python my_long_running_script.py)
30+
```
31+
32+
The `vbatch` command prints out a task ID that you can reference later. It also provides a URL for tracking progress. To view the logs of the task in your terminal, use the `velda task log` command:
33+
34+
```
35+
velda task log ${JOB_ID}
36+
```
37+
38+
You can also combine these approaches using `vbatch -f`: Since most errors occur during the initial phase of a job, the `-f` option waits until the task starts and streams the logs. You can interrupt with `Ctrl-C` at any time while keeping the task running in the background.
39+
40+
```
41+
vbatch -f -P cpu-8 python my_long_running_script.py
42+
```
43+
<img src="https://cdn-images-1.medium.com/max/1600/1*SAFtoS16dTi2npTcV1Ofgw.png" alt="Screenshot of task logs" />
44+
<center><small>Web page for task details and logs</small></center>
45+
46+
## **Simple Machine Learning Workflow: Process, Train, Evaluate**
47+
48+
Let's build a simple ML workflow with three stages: data processing, model training, and evaluation. We can chain these tasks together using the `--after-success` flag, which ensures a task starts only after its dependencies have successfully completed. Assuming you already have scripts for each step, creating the pipeline in Velda is as simple as running these commands:
49+
50+
```
51+
# Process the data, e.g. data cleaning
52+
vbatch -P cpu-16 --name process python process_data.py
53+
54+
# Train the model after processing is done
55+
vbatch -P gpu-1 --name train --after-success process python train_model.py
56+
57+
# Model evaluation after training is complete
58+
vbatch -P cpu-8 --name eval --after-success train python evaluate_model.py
59+
```
60+
61+
This creates a linear pipeline where each step is executed in sequence.
62+
63+
<img src="https://cdn-images-1.medium.com/max/1600/1*UFUi0CM4fZaN1ey4MnZfmQ.png" alt="Task list view" />
64+
<center><small>Web page for task list view</small></center>
65+
66+
## **Batch Processing: Processing Data in Parallel**
67+
68+
In many real-world scenarios, you'll need to process a large number of data files. This is where the "fan-out" pattern comes in handy. We can use standard bash commands like `xargs` or a `for` loop to process all files from a source in parallel.
69+
70+
For example, to process all .csv files in a directory:
71+
72+
```
73+
vbatch bash -c "ls *.csv | xargs -I {} vbatch --name {} -P cpu-8 python process_file.py {}"
74+
```
75+
76+
This command launches a separate task for each .csv file, processing them in parallel. Since it's wrapped under one top-level `vbatch` command, every individual command is grouped under one parent task for better organization and searchability.
77+
78+
Keep in mind that there's always some overhead for starting a task (about 1 second), so we recommend that each task runs for at least one minute. If needed, you can chunk the inputs:
79+
80+
```
81+
vbatch bash -c "ls *.csv | xargs -L 100 vbatch -P cpu-8 python process_file.py"
82+
```
83+
84+
This automatically groups up to 100 files in each task and reduces scheduling overhead.
85+
86+
## **Embedding the Fan-Out in an ML Pipeline**
87+
88+
Now, let's embed this fan-out step into a larger ML workflow. We can create a bash script that contains the fan-out logic and then execute that script as a step in our pipeline.
89+
90+
`process_all.sh`
91+
92+
```
93+
#!/bin/bash
94+
ls *.csv | xargs -I {} vrun -P cpu-8 python process_file.py {}
95+
```
96+
97+
Now, we can incorporate this into our main pipeline:
98+
99+
```
100+
# Process all data files in parallel
101+
vbatch -P cpu-16 --name preprocess ./process_all.sh
102+
103+
# Train the model after all files are processed
104+
vbatch -P gpu-1 --name train --after-success preprocess python train_model.py
105+
106+
# Evaluate the model
107+
vbatch -P cpu-8 --name eval --after-success train python evaluate_model.py
108+
```
109+
110+
The pipeline above will start train only if all pre-processing tasks has been completed. With that, you can process thousands
111+
of data sets without any complex orchestration tools.
112+
113+
Looking for more granular data processing like Ray/DataFlow? No problem, in a future tutorial we will also show how to
114+
use Velda to scale up your data processing pipelines.
115+
116+
## **More Complex Pipelines: Recursive Fan-Outs**
117+
118+
For more complex scenarios, you can define recursive pipelines within the fan-out pattern, or any hierarhcy that you need. For example, for each data point, you might want to run inference, evaluate the result, and then aggregate the evaluations. This can be achieved by defining a "sub-pipeline" for each data point.
119+
120+
Let's say we have a script `inference_and_eval.sh` that takes a data file as input and performs both inference and evaluation:
121+
122+
`inference_and_eval.sh`
123+
124+
```
125+
#!/bin/bash
126+
DATA_FILE=$1
127+
128+
# Run inference
129+
vrun -P gpu-1 --name inference ./run_inference.py $DATA_FILE
130+
# Evaluate the inference result
131+
vrun -P cpu-4 --after-success inference ./evaluate_inference.py $DATA_FILE
132+
```
133+
134+
Now, we can use this script in our fan-out:
135+
136+
```
137+
vbatch bash -c "ls *.csv | xargs -I {} vbatch --name {} ./inference_and_eval.sh {}"
138+
```
139+
140+
This creates a two-level pipeline for each data file. The power of this approach is that you can build arbitrarily complex and recursive ML workflows that remain easy to manage and scale.
141+
142+
By leveraging Velda's `vrun` and `vbatch` commands along with bash scripting, you can build sophisticated, scalable, and reproducible ML workflows with ease. This allows you to focus on what matters most: building great models.
143+
144+
## **Getting Started Today**
145+
146+
Ready to build your first pipeline? A few options to get started:
147+
148+
1. [**Open Source**](https://github.com/velda-io/velda): Try Velda's open-source edition
149+
2. [**Enterprise**](https://velda.io/book): Deploy with SSO, RBAC, and advanced observability
150+
3. **Hosted(Coming soon)**: Immediately scale with Velda's managed platform

blog/vrun-is-all-you-need.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ category: "Technical Blog"
1616

1717
## **The Cloud Development Dilemma**
1818

19-
Picture this: You're a developer working on a machine learning project. Your local laptop struggles with training models, so you spin up a cloud VM. But now you're spending hours setting up the new environment, and paying for compute power even when you're in meetings or writing documentation. When you actually need serious computational power like multi-node ai training, you're stuck dealing with Kubernetes manifests that feel like learning a foreign language.
19+
Picture this: You're a developer working on a machine learning project. Your local laptop struggles with training models, so you spin up a cloud VM. But now you're spending hours setting up the new environment, and paying for compute power even when you're in meetings or writing documentation. When you actually need serious computational power like multi-node AI training, you're stuck dealing with Kubernetes manifests that feel like learning a foreign language.
2020

2121
Sound familiar? You're not alone. Traditional cloud development tools force developers to choose between expensive always-on instances or complex orchestration platforms that require DevOps expertise just to run a simple training job.
2222

sitemap.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ description: Complete site navigation for Velda - find all pages, blog posts, an
2323

2424
## 📰 Blog
2525
- [Blog Home](/blog/) - Latest posts and updates
26+
- [Building a Scalable ML Workflow with Velda](/blog/build-machine-learning-workflow) - *September 24, 2025*
2627
- [vrun is All You Need: Revolutionizing Development with One Command](/blog/vrun-is-all-you-need) - *September 14, 2025*
2728
- [Why AI/ML Researchers Are Stuck with Inefficient GPU Setups (And How to Fix It)](/blog/why-stuck-inefficient-gpu-setup) - *September 7, 2025*
2829
- [Velda Blog - Cloud Development Insights & Updates](/blog/) - *September 7, 2025*

slurm-alternative.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ image: "https://velda.io/og-preview.png"
2727

2828
| Feature | Slurm | Velda |
2929
|--------------------|--------------------|--------------------|
30-
| **Containerized Execution**<br>Each task runs in a container that is isolated with other workloads on the same node, and developers cannot access tasks of other users|||
31-
| **Environment customization**<br>Every workload can run in fully customizable environment, including system packages(apt, pip, etc.) |||
30+
| **Containerized Execution**<br>Each task runs in a container that is isolated with other workloads on the same node, and workload cannot access tasks of other users|||
31+
| **Environment customization**<br>Every workload can run in fully customizable environment, including some system packages(apt, pip, etc.) |||
3232
| **Email**<br>Get notified when your job is completed |||
3333
| **Cluster autoscale**<br>Allocate compute resources from Cloud / Kubernetes based on demand |||
3434
| **Interactive Development**<br>Developers have access to dedicated dev-environments, with full capability like IDEs and docker access |||

0 commit comments

Comments
 (0)