diff --git a/CHANGELOG.md b/CHANGELOG.md index 7667e3e..e7b673d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -87,7 +87,7 @@ - add ut for ascend by ([@shijinye](https://github.com/shijinye)) in [#664](https://github.com/Project-HAMi/HAMi/pull/664) - optimization map init in test by ([@lengrongfu](https://github.com/lengrongfu)) in [#678](https://github.com/Project-HAMi/HAMi/pull/678) - Optimize monitor by ([@for800000](https://github.com/for800000)) in [#683](https://github.com/Project-HAMi/HAMi/pull/683) -- fix code lint faild by ([@lengrongfu](https://github.com/lengrongfu)) in [#685](https://github.com/Project-HAMi/HAMi/pull/685) +- fix code lint failed by ([@lengrongfu](https://github.com/lengrongfu)) in [#685](https://github.com/Project-HAMi/HAMi/pull/685) - fix(helm): Add NODE_NAME env var to the vgpu-monitor container from spec.nodeName by ([@Nimbus318](https://github.com/Nimbus318)) in [#687](https://github.com/Project-HAMi/HAMi/pull/687) - fix vGPUmonitor deviceidx is always 0 by ([@lengrongfu](https://github.com/lengrongfu)) in [#684](https://github.com/Project-HAMi/HAMi/pull/684) - add ut for pkg/scheduler/event.go by ([@Penguin-zlh](https://github.com/Penguin-zlh)) in [#688](https://github.com/Project-HAMi/HAMi/pull/688) diff --git a/blog/2024-12-31-post/index.md b/blog/2024-12-31-post/index.md index 774f894..1192904 100644 --- a/blog/2024-12-31-post/index.md +++ b/blog/2024-12-31-post/index.md @@ -1600,7 +1600,7 @@ type DevicePluginServer interface { // Plugin can run device specific operations and instruct Kubelet // of the steps to make the Device available in the container Allocate(context.Context, *AllocateRequest) (*AllocateResponse, error) - // PreStartContainer is called, if indicated by Device Plugin during registeration phase, + // PreStartContainer is called, if indicated by Device Plugin during registration phase, // before each container start. Device plugin can run device specific operations // such as resetting the device before making devices available to the container PreStartContainer(context.Context, *PreStartContainerRequest) (*PreStartContainerResponse, error) @@ -1678,7 +1678,7 @@ func (plugin *NvidiaDevicePlugin) WatchAndRegister() { errorSleepInterval := time.Second * 5 successSleepInterval := time.Second * 30 for { - err := plugin.RegistrInAnnotation() + err := plugin.RegisterInAnnotation() if err != nil { klog.Errorf("Failed to register annotation: %v", err) klog.Infof("Retrying in %v seconds...", errorSleepInterval) @@ -1692,7 +1692,7 @@ func (plugin *NvidiaDevicePlugin) WatchAndRegister() { ``` ```golang -func (plugin *NvidiaDevicePlugin) RegistrInAnnotation() error { +func (plugin *NvidiaDevicePlugin) RegisterInAnnotation() error { devices := plugin.getAPIDevices() klog.InfoS("start working on the devices", "devices", devices) annos := make(map[string]string) diff --git a/changelog/source/v2.5.0.md b/changelog/source/v2.5.0.md index 9904999..f56d309 100644 --- a/changelog/source/v2.5.0.md +++ b/changelog/source/v2.5.0.md @@ -66,7 +66,7 @@ authors: - add ut for ascend by ([@shijinye](https://github.com/shijinye)) in [#664](https://github.com/Project-HAMi/HAMi/pull/664) - optimization map init in test by ([@lengrongfu](https://github.com/lengrongfu)) in [#678](https://github.com/Project-HAMi/HAMi/pull/678) - Optimize monitor by ([@for800000](https://github.com/for800000)) in [#683](https://github.com/Project-HAMi/HAMi/pull/683) -- fix code lint faild by ([@lengrongfu](https://github.com/lengrongfu)) in [#685](https://github.com/Project-HAMi/HAMi/pull/685) +- fix code lint failed by ([@lengrongfu](https://github.com/lengrongfu)) in [#685](https://github.com/Project-HAMi/HAMi/pull/685) - fix(helm): Add NODE_NAME env var to the vgpu-monitor container from spec.nodeName by ([@Nimbus318](https://github.com/Nimbus318)) in [#687](https://github.com/Project-HAMi/HAMi/pull/687) - fix vGPUmonitor deviceidx is always 0 by ([@lengrongfu](https://github.com/lengrongfu)) in [#684](https://github.com/Project-HAMi/HAMi/pull/684) - add ut for pkg/scheduler/event.go by ([@Penguin-zlh](https://github.com/Penguin-zlh)) in [#688](https://github.com/Project-HAMi/HAMi/pull/688) @@ -160,4 +160,4 @@ authors: - phoenixwu0229 ([@phoenixwu0229](https://github.com/phoenixwu0229)) - chinaran ([@chinaran](https://github.com/chinaran)) -**Full Changelog**: https://github.com/Project-HAMi/HAMi/compare/v2.4.1...v2.5.0 \ No newline at end of file +**Full Changelog**: https://github.com/Project-HAMi/HAMi/compare/v2.4.1...v2.5.0 diff --git a/docs/contributor/contributers.md b/docs/contributor/contributors.md similarity index 100% rename from docs/contributor/contributers.md rename to docs/contributor/contributors.md diff --git a/docs/contributor/goverance.md b/docs/contributor/governance.md similarity index 99% rename from docs/contributor/goverance.md rename to docs/contributor/governance.md index 86e36fc..629d23e 100644 --- a/docs/contributor/goverance.md +++ b/docs/contributor/governance.md @@ -1,5 +1,5 @@ --- -title: Goverance +title: Governance --- Heterogeneous AI Computing Virtualization Middleware (HAMi), formerly known as k8s-vGPU-scheduler, is an "all-in-one" tools designed to manage Heterogeneous AI Computing Devices in a k8s cluster diff --git a/docs/contributor/ladder.md b/docs/contributor/ladder.md index 943bf56..c50d80e 100644 --- a/docs/contributor/ladder.md +++ b/docs/contributor/ladder.md @@ -1,5 +1,5 @@ --- -title: Contributer Ladder +title: Contributor Ladder --- This docs different ways to get involved and level up within the project. You can see different roles within the project in the contributor roles. diff --git a/docs/developers/Dynamic-mig.md b/docs/developers/Dynamic-mig.md index 4f39745..3111f34 100644 --- a/docs/developers/Dynamic-mig.md +++ b/docs/developers/Dynamic-mig.md @@ -19,7 +19,7 @@ HAMi is done by using [hami-core](https://github.com/Project-HAMi/HAMi-core), wh - CPU, Mem, and GPU combined schedule - GPU dynamic slice: Hami-core and MIG - Support node-level binpack and spread by GPU memory, CPU and Mem -- A unified vGPU Pool different virtualization technics +- A unified vGPU Pool different virtualization techniques - Tasks can choose to use MIG, use HAMi-core, or use both. ### Config maps @@ -104,7 +104,7 @@ data: ## Examples -Dynamic mig is compatable with hami tasks, as the example below: +Dynamic mig is compatible with hami tasks, as the example below: Just Setting `nvidia.com/gpu` and `nvidia.com/gpumem`. ```yaml @@ -149,7 +149,7 @@ The Procedure of a vGPU task which uses dynamic-mig is shown below: -Note that after submited a task, deviceshare plugin will iterate over templates defined in configMap `hami-scheduler-device`, and find the first available template to fit. You can always change the content of that configMap, and restart vc-scheduler to customize. +Note that after submitted a task, deviceshare plugin will iterate over templates defined in configMap `hami-scheduler-device`, and find the first available template to fit. You can always change the content of that configMap, and restart vc-scheduler to customize. If you submit the example on an empty A100-PCIE-40GB node, then it will select a GPU and choose MIG template below: diff --git a/docs/developers/protocol.md b/docs/developers/protocol.md index e9842c0..0c4f420 100644 --- a/docs/developers/protocol.md +++ b/docs/developers/protocol.md @@ -12,10 +12,10 @@ HAMi needs to know the spec of each AI devices in the cluster in order to schedu ``` hami.io/node-handshake-\{device-type\}: Reported_\{device_node_current_timestamp\} -hami.io/node-\{deivce-type\}-register: \{Device 1\}:\{Device2\}:...:\{Device N\} +hami.io/node-\{device-type\}-register: \{Device 1\}:\{Device2\}:...:\{Device N\} ``` -The definiation of each device is in the following format: +The definition of each device is in the following format: ``` \{Device UUID\},\{device split count\},\{device memory limit\},\{device core limit\},\{device type\},\{device numa\},\{healthy\} ``` diff --git a/docs/developers/scheduling.md b/docs/developers/scheduling.md index c033ece..3ea6b6a 100644 --- a/docs/developers/scheduling.md +++ b/docs/developers/scheduling.md @@ -82,7 +82,7 @@ GPU spread, use different GPU cards when possible, egs: ### Node-scheduler-policy -![node-shceduler-policy-demo.png](../resources/node-shceduler-policy-demo.png) +![node-scheduler-policy-demo.png](../resources/node-scheduler-policy-demo.png) #### Binpack @@ -166,4 +166,4 @@ GPU1 Score: ((20+10)/100 + (1000+2000)/8000)) * 10 = 6.75 GPU2 Score: ((20+70)/100 + (1000+6000)/8000)) * 10 = 17.75 ``` -So, in `Spread` policy we can select `GPU1`. \ No newline at end of file +So, in `Spread` policy we can select `GPU1`. diff --git a/docs/get-started/deploy-with-helm.md b/docs/get-started/deploy-with-helm.md index 49b22c9..85171db 100644 --- a/docs/get-started/deploy-with-helm.md +++ b/docs/get-started/deploy-with-helm.md @@ -160,7 +160,7 @@ spec: nvidia.com/gpumem: 10240 # Each vGPU contains 10240m device memory (Optional,Integer) ``` -#### 2. Verify in container resouce control {#verify-in-container-resouce-control} +#### 2. Verify in container resource control {#verify-in-container-resource-control} Execute the following query command: diff --git a/docs/installation/how-to-use-volcano-vgpu.md b/docs/installation/how-to-use-volcano-vgpu.md index 7f91553..65470f8 100644 --- a/docs/installation/how-to-use-volcano-vgpu.md +++ b/docs/installation/how-to-use-volcano-vgpu.md @@ -113,7 +113,7 @@ spec: resources: limits: volcano.sh/vgpu-number: 2 # requesting 2 gpu cards - volcano.sh/vgpu-memory: 3000 # (optinal)each vGPU uses 3G device memory + volcano.sh/vgpu-memory: 3000 # (optional)each vGPU uses 3G device memory volcano.sh/vgpu-cores: 50 # (optional)each vGPU uses 50% core EOF ``` diff --git a/docs/installation/offline-installation.md b/docs/installation/offline-installation.md index 19f3647..3f5e7ab 100644 --- a/docs/installation/offline-installation.md +++ b/docs/installation/offline-installation.md @@ -21,8 +21,8 @@ Load the images, tag them with your internal registry, and push them to your reg docker load -i {HAMi_image}.tar docker tag projecthami/hami:{HAMi version} {your_inner_registry}/hami:{HAMi version} docker push {your_inner_registry}/hami:{HAMi version} -docker tag docker.io/jettech/kube-webhook-certgen:v1.5.2 {your inner_regisry}/kube-webhook-certgen:v1.5.2 -docker push {your inner_regisry}/kube-webhook-certgen:v1.5.2 +docker tag docker.io/jettech/kube-webhook-certgen:v1.5.2 {your inner_registry}/kube-webhook-certgen:v1.5.2 +docker push {your inner_registry}/kube-webhook-certgen:v1.5.2 docker tag liangjw/kube-webhook-certgen:v1.1.1 {your_inner_registry}/kube-webhook-certgen:v1.1.1 docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:{your kubernetes version} {your_inner_registry}/kube-scheduler:{your kubernetes version} docker push {your_inner_registry}/kube-scheduler:{your kubernetes version} @@ -31,7 +31,7 @@ docker push {your_inner_registry}/kube-scheduler:{your kubernetes version} ## Prepare HAMi chart Download the charts folder from [github](https://github.com/Project-HAMi/HAMi/tree/master/charts), -place it into $\{CHART_PATH\} inside cluser, then edit the following fields in $\{CHART_PATH\}/hami/values.yaml. +place it into $\{CHART_PATH\} inside cluster, then edit the following fields in $\{CHART_PATH\}/hami/values.yaml. ```yaml scheduler: diff --git a/docs/key-features/device-resource-isolation.md b/docs/key-features/device-resource-isolation.md index feb747e..cf966d1 100644 --- a/docs/key-features/device-resource-isolation.md +++ b/docs/key-features/device-resource-isolation.md @@ -2,7 +2,7 @@ title: Device resource isolation --- -A simple demostration for device isolation: +A simple demonstration for device isolation: A task with the following resources. ``` diff --git a/docs/releases.md b/docs/releases.md index 150ef87..43b122a 100644 --- a/docs/releases.md +++ b/docs/releases.md @@ -79,7 +79,7 @@ Hence, if an issue is important it is important to advocate its priority early i \ No newline at end of file +``` --> diff --git a/versioned_docs/version-v2.5.0/resources/installation/install-binary/generate_cert/generate_ca.sh b/versioned_docs/version-v2.5.0/resources/installation/install-binary/generate_cert/generate_ca.sh index 519b735..7b3cf6d 100755 --- a/versioned_docs/version-v2.5.0/resources/installation/install-binary/generate_cert/generate_ca.sh +++ b/versioned_docs/version-v2.5.0/resources/installation/install-binary/generate_cert/generate_ca.sh @@ -1,6 +1,6 @@ #!/bin/bash -# genererate front-proxy-ca, server-ca +# generate front-proxy-ca, server-ca set -e set -o pipefail @@ -23,4 +23,4 @@ function main() { gen_front_proxy_ca } -main "$@" \ No newline at end of file +main "$@" diff --git a/versioned_docs/version-v2.5.0/resources/installation/install-binary/generate_cert/generate_etcd.sh b/versioned_docs/version-v2.5.0/resources/installation/install-binary/generate_cert/generate_etcd.sh index 7c82b46..7fcce7f 100755 --- a/versioned_docs/version-v2.5.0/resources/installation/install-binary/generate_cert/generate_etcd.sh +++ b/versioned_docs/version-v2.5.0/resources/installation/install-binary/generate_cert/generate_etcd.sh @@ -1,6 +1,6 @@ #!/bin/bash -# genererate CA & leaf certificates of etcd. +# generate CA & leaf certificates of etcd. set -e set -o pipefail @@ -45,4 +45,4 @@ function main() { generate_leaf_certs } -main "$@" \ No newline at end of file +main "$@" diff --git a/versioned_docs/version-v2.5.0/resources/node-shceduler-policy-demo.png b/versioned_docs/version-v2.5.0/resources/node-scheduler-policy-demo.png similarity index 100% rename from versioned_docs/version-v2.5.0/resources/node-shceduler-policy-demo.png rename to versioned_docs/version-v2.5.0/resources/node-scheduler-policy-demo.png diff --git a/versioned_docs/version-v2.5.0/userguide/Cambricon-device/enable-cambricon-mlu-sharing.md b/versioned_docs/version-v2.5.0/userguide/Cambricon-device/enable-cambricon-mlu-sharing.md index 86b850a..ae498ab 100644 --- a/versioned_docs/version-v2.5.0/userguide/Cambricon-device/enable-cambricon-mlu-sharing.md +++ b/versioned_docs/version-v2.5.0/userguide/Cambricon-device/enable-cambricon-mlu-sharing.md @@ -22,7 +22,7 @@ title: Enable cambricon MLU sharing ## Enabling MLU-sharing Support -* Contact your device provider to aquire cambricon-device-plugin>2.0.9, edit parameter `mode` to 'dynamic-smlu` in containers.args field. +* Contact your device provider to acquire cambricon-device-plugin>2.0.9, edit parameter `mode` to 'dynamic-smlu` in containers.args field. ``` args: @@ -63,4 +63,4 @@ spec: cambricon.com/mlu.smlu.vcore: 10 # each MLU requesting 10% MLU device core ``` -> **NOTICE:** *`vmemory` and `vcore` can only work when `cambricon.com/mlunum=1`* \ No newline at end of file +> **NOTICE:** *`vmemory` and `vcore` can only work when `cambricon.com/mlunum=1`* diff --git a/versioned_docs/version-v2.5.0/userguide/Enflame-device/enable-enflame-gpu-sharing.md b/versioned_docs/version-v2.5.0/userguide/Enflame-device/enable-enflame-gpu-sharing.md index 8d73123..4761c98 100644 --- a/versioned_docs/version-v2.5.0/userguide/Enflame-device/enable-enflame-gpu-sharing.md +++ b/versioned_docs/version-v2.5.0/userguide/Enflame-device/enable-enflame-gpu-sharing.md @@ -23,7 +23,7 @@ title: Enable Enflame GCU sharing ## Enabling GCU-sharing Support -* Deploy gcushare-device-plugin on enflame nodes (Please consult your device provider to aquire its package and document) +* Deploy gcushare-device-plugin on enflame nodes (Please consult your device provider to acquire its package and document) > **NOTICE:** *Install only gpushare-device-plugin, don't install gpu-scheduler-plugin package.* @@ -121,4 +121,4 @@ Look for annotations containing device information in the node status. 2. Multiple GCU allocation in one container is not supported yet -3. `efsmi` inside container shows the total device memory, which is NOT a bug, device memory will be properly limited when running tasks. \ No newline at end of file +3. `efsmi` inside container shows the total device memory, which is NOT a bug, device memory will be properly limited when running tasks. diff --git a/versioned_docs/version-v2.5.0/userguide/Enflame-device/specify-device-slice.md b/versioned_docs/version-v2.5.0/userguide/Enflame-device/specify-device-slice.md index 1ea085e..0b56016 100644 --- a/versioned_docs/version-v2.5.0/userguide/Enflame-device/specify-device-slice.md +++ b/versioned_docs/version-v2.5.0/userguide/Enflame-device/specify-device-slice.md @@ -4,11 +4,11 @@ title: Allocate Enflame GCU slice ## Allocate a portion of GCU -Allocate a portion of device memory by specify resources `enflame.com/vgcu` and `enflame.com/vgcu-percentage`. Each unit of `enflame.com/vgcu-percentage` equals to 1% devie memory and computing cores. +Allocate a portion of device memory by specify resources `enflame.com/vgcu` and `enflame.com/vgcu-percentage`. Each unit of `enflame.com/vgcu-percentage` equals to 1% device memory and computing cores. ``` resources: limits: enflame.com/vgcu: 1 # requesting 1 GCU enflame.com/vgcu-percentage: 32 # Each GPU contains 32% device memory and cores -``` \ No newline at end of file +``` diff --git a/versioned_docs/version-v2.5.0/userguide/Hygon-device/enable-hygon-dcu-sharing.md b/versioned_docs/version-v2.5.0/userguide/Hygon-device/enable-hygon-dcu-sharing.md index f667870..a90f408 100644 --- a/versioned_docs/version-v2.5.0/userguide/Hygon-device/enable-hygon-dcu-sharing.md +++ b/versioned_docs/version-v2.5.0/userguide/Hygon-device/enable-hygon-dcu-sharing.md @@ -78,4 +78,4 @@ Launch your DCU tasks like you usually do 2. DCU-sharing in init container is not supported, pods with "hygon.com/dcumem" in init container will never be scheduled. -3. Only one vdcu can be aquired per container. If you want to mount multiple dcu devices, then you shouldn't set `hygon.com/dcumem` or `hygon.com/dcucores` +3. Only one vdcu can be acquired per container. If you want to mount multiple dcu devices, then you shouldn't set `hygon.com/dcumem` or `hygon.com/dcucores` diff --git a/versioned_docs/version-v2.5.0/userguide/Iluvatar-device/enable-iluvatar-gpu-sharing.md b/versioned_docs/version-v2.5.0/userguide/Iluvatar-device/enable-iluvatar-gpu-sharing.md index c8ddebe..615adcc 100644 --- a/versioned_docs/version-v2.5.0/userguide/Iluvatar-device/enable-iluvatar-gpu-sharing.md +++ b/versioned_docs/version-v2.5.0/userguide/Iluvatar-device/enable-iluvatar-gpu-sharing.md @@ -23,7 +23,7 @@ title: Enable Iluvatar GCU sharing ## Enabling GPU-sharing Support -* Deploy gpu-manager on iluvatar nodes (Please consult your device provider to aquire its package and document) +* Deploy gpu-manager on iluvatar nodes (Please consult your device provider to acquire its package and document) > **NOTICE:** *Install only gpu-manager, don't install gpu-admission package.* @@ -114,4 +114,4 @@ spec: 3. The `iluvatar.ai/vcuda-memory` resource is only effective when `iluvatar.ai/vgpu=1`. -4. Multi-device requests (`iluvatar.ai/vgpu > 1`) do not support vGPU mode. \ No newline at end of file +4. Multi-device requests (`iluvatar.ai/vgpu > 1`) do not support vGPU mode. diff --git a/versioned_docs/version-v2.5.0/userguide/Metax-device/enable-metax-gpu-schedule.md b/versioned_docs/version-v2.5.0/userguide/Metax-device/enable-metax-gpu-schedule.md index 46d8e8b..0d90f77 100644 --- a/versioned_docs/version-v2.5.0/userguide/Metax-device/enable-metax-gpu-schedule.md +++ b/versioned_docs/version-v2.5.0/userguide/Metax-device/enable-metax-gpu-schedule.md @@ -9,7 +9,7 @@ When multiple GPUs are configured on a single server, the GPU cards are connecte ![img](../../resources/metax_topo.jpg) -A user job requests a certain number of metax-tech.com/gpu resources, Kubernetes schedule pods to the appropriate node. gpu-device further processes the logic of allocating the remaining resources on the resource node following criterias below: +A user job requests a certain number of metax-tech.com/gpu resources, Kubernetes schedule pods to the appropriate node. gpu-device further processes the logic of allocating the remaining resources on the resource node following criteria below: 1. MetaXLink takes precedence over PCIe Switch in two way: – A connection is considered a MetaXLink connection when there is a MetaXLink connection and a PCIe Switch connection between the two cards. – When both the MetaXLink and the PCIe Switch can meet the job request @@ -36,7 +36,7 @@ Equipped with MetaXLink interconnected resources. ## Enabling topo-awareness scheduling -* Deploy Metax GPU Extensions on metax nodes (Please consult your device provider to aquire its package and document) +* Deploy Metax GPU Extensions on metax nodes (Please consult your device provider to acquire its package and document) * Deploy HAMi according to README.md @@ -64,4 +64,4 @@ spec: > **NOTICE2:** *You can find more examples in examples folder - \ No newline at end of file + diff --git a/versioned_docs/version-v2.5.0/userguide/Metax-device/enable-metax-gpu-sharing.md b/versioned_docs/version-v2.5.0/userguide/Metax-device/enable-metax-gpu-sharing.md index 04069b7..479c454 100644 --- a/versioned_docs/version-v2.5.0/userguide/Metax-device/enable-metax-gpu-sharing.md +++ b/versioned_docs/version-v2.5.0/userguide/Metax-device/enable-metax-gpu-sharing.md @@ -27,7 +27,7 @@ device-sharing features include the following: ### Enabling GPU-sharing Support -* Deploy Metax GPU Operator on metax nodes (Please consult your device provider to aquire its package and document) +* Deploy Metax GPU Operator on metax nodes (Please consult your device provider to acquire its package and document) * Deploy HAMi according to README.md @@ -54,4 +54,4 @@ spec: metax-tech.com/vmemory: 4 # each GPU require 4 GiB device memory ``` -> **NOTICE1:** *You can find more examples in examples/sgpu folder* \ No newline at end of file +> **NOTICE1:** *You can find more examples in examples/sgpu folder* diff --git a/versioned_docs/version-v2.5.0/userguide/Metax-device/examples/gpu/allocate-binpack.md b/versioned_docs/version-v2.5.0/userguide/Metax-device/examples/gpu/allocate-binpack.md index 902a490..62f77fb 100644 --- a/versioned_docs/version-v2.5.0/userguide/Metax-device/examples/gpu/allocate-binpack.md +++ b/versioned_docs/version-v2.5.0/userguide/Metax-device/examples/gpu/allocate-binpack.md @@ -4,7 +4,7 @@ title: Binpack schedule policy ## Allocate metax device using binpack schedule policy -To allocate metax device with mininum damage to topology, you need to only assign `metax-tech.com/gpu` with annotations `hami.io/node-scheduler-policy`=`binpack` +To allocate metax device with minimum damage to topology, you need to only assign `metax-tech.com/gpu` with annotations `hami.io/node-scheduler-policy`=`binpack` ``` apiVersion: v1 @@ -22,4 +22,4 @@ spec: resources: limits: metax-tech.com/gpu: 1 # requesting 1 metax GPU -``` \ No newline at end of file +``` diff --git a/versioned_docs/version-v2.5.0/userguide/Metax-device/specify-binpack-task.md b/versioned_docs/version-v2.5.0/userguide/Metax-device/specify-binpack-task.md index f9c56fe..1900d49 100644 --- a/versioned_docs/version-v2.5.0/userguide/Metax-device/specify-binpack-task.md +++ b/versioned_docs/version-v2.5.0/userguide/Metax-device/specify-binpack-task.md @@ -4,7 +4,7 @@ title: Binpack schedule policy ## Set schedule policy to binpack -To allocate metax device with mininum damage to topology, you need to only assign `metax-tech.com/gpu` with annotations `hami.io/node-scheduler-policy`=`binpack` +To allocate metax device with minimum damage to topology, you need to only assign `metax-tech.com/gpu` with annotations `hami.io/node-scheduler-policy`=`binpack` ``` metadata: diff --git a/versioned_docs/version-v2.5.0/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md b/versioned_docs/version-v2.5.0/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md index 009a61f..9942c98 100644 --- a/versioned_docs/version-v2.5.0/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md +++ b/versioned_docs/version-v2.5.0/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md @@ -29,7 +29,7 @@ title: Enable Mthreads GPU sharing ## Enabling GPU-sharing Support -* Deploy MT-CloudNative Toolkit on mthreads nodes (Please consult your device provider to aquire its package and document) +* Deploy MT-CloudNative Toolkit on mthreads nodes (Please consult your device provider to acquire its package and document) > **NOTICE:** *You can remove mt-mutating-webhook and mt-gpu-scheduler after installation(optional).* @@ -68,4 +68,4 @@ spec: > **NOTICE2:** *You can find more examples in examples folder* - \ No newline at end of file + diff --git a/versioned_docs/version-v2.5.0/userguide/NVIDIA-device/dynamic-mig-support.md b/versioned_docs/version-v2.5.0/userguide/NVIDIA-device/dynamic-mig-support.md index 492c1b6..4a25900 100644 --- a/versioned_docs/version-v2.5.0/userguide/NVIDIA-device/dynamic-mig-support.md +++ b/versioned_docs/version-v2.5.0/userguide/NVIDIA-device/dynamic-mig-support.md @@ -8,11 +8,11 @@ title: Enable dynamic-mig feature ***Dynamic MIG instance management***: User don't need to operate on GPU node, using 'nvidia-smi -i 0 -mig 1' or other command to manage MIG instance, all will be done by HAMi-device-plugin. -***Dynamic MIG Adjustment***: Each MIG device managed by HAMi will dyamically adjust their MIG template according to tasks submitted when necessary. +***Dynamic MIG Adjustment***: Each MIG device managed by HAMi will dynamically adjust their MIG template according to tasks submitted when necessary. ***Device MIG Observation***: Each MIG instance generated by HAMi will be shown in scheduler-monitor, including task information. user can get a clear overview of MIG nodes. -***Compatable with HAMi-core nodes***: HAMi can manage a unified GPU pool of `HAMi-core node` and `mig node`. A task can be scheduled to either node if not appointed manually by using `nvidia.com/vgpu-mode` annotation. +***Compatible with HAMi-core nodes***: HAMi can manage a unified GPU pool of `HAMi-core node` and `mig node`. A task can be scheduled to either node if not appointed manually by using `nvidia.com/vgpu-mode` annotation. ***Unified API with HAMi-core***: Zero work needs to be done to make the job compatible with dynamic-mig feature. @@ -178,4 +178,4 @@ nodeGPUMigInstance{deviceidx="1",deviceuuid="GPU-30f90f49-43ab-0a78-bf5c-93ed41e 2. Nvidia devices before Ampere architect can't use 'mig' mode -3. You won't see any mig resources(ie, `nvidia.com/mig-1g.10gb`) on node, hami uses a unified resource name for both 'mig' and 'hami-core' node \ No newline at end of file +3. You won't see any mig resources(ie, `nvidia.com/mig-1g.10gb`) on node, hami uses a unified resource name for both 'mig' and 'hami-core' node diff --git a/versioned_docs/version-v2.5.0/userguide/NVIDIA-device/examples/specify-card-type-to-use.md b/versioned_docs/version-v2.5.0/userguide/NVIDIA-device/examples/specify-card-type-to-use.md index ce0bef0..397e984 100644 --- a/versioned_docs/version-v2.5.0/userguide/NVIDIA-device/examples/specify-card-type-to-use.md +++ b/versioned_docs/version-v2.5.0/userguide/NVIDIA-device/examples/specify-card-type-to-use.md @@ -24,4 +24,4 @@ spec: nvidia.com/gpu: 2 # requesting 2 vGPUs ``` -> **NOTICE:** * You can assign this task to multiple GPU types, use comma to seperate,In this example, we want to run this job on A100 or V100* \ No newline at end of file +> **NOTICE:** * You can assign this task to multiple GPU types, use comma to separate,In this example, we want to run this job on A100 or V100* diff --git a/versioned_docs/version-v2.5.0/userguide/NVIDIA-device/specify-device-type-to-use.md b/versioned_docs/version-v2.5.0/userguide/NVIDIA-device/specify-device-type-to-use.md index 0ec756e..3d3eef6 100644 --- a/versioned_docs/version-v2.5.0/userguide/NVIDIA-device/specify-device-type-to-use.md +++ b/versioned_docs/version-v2.5.0/userguide/NVIDIA-device/specify-device-type-to-use.md @@ -11,7 +11,7 @@ For example, a task with the following annotation will be assigned to A100 or V1 ```yaml metadata: annotations: - nvidia.com/use-gputype: "A100,V100" # Specify the card type for this job, use comma to seperate, will not launch job on non-specified card + nvidia.com/use-gputype: "A100,V100" # Specify the card type for this job, use comma to separate, will not launch job on non-specified card ``` A task may use `nvidia.com/nouse-gputype` to evade certain type of GPU. In this following example, that job won't be assigned to 1080(include 1080Ti) or 2080(include 2080Ti) type of card. @@ -19,5 +19,5 @@ A task may use `nvidia.com/nouse-gputype` to evade certain type of GPU. In this ```yaml metadata: annotations: - nvidia.com/nouse-gputype: "1080,2080" # Specify the blacklist card type for this job, use comma to seperate, will not launch job on specified card + nvidia.com/nouse-gputype: "1080,2080" # Specify the blacklist card type for this job, use comma to separate, will not launch job on specified card ``` diff --git a/versioned_docs/version-v2.5.0/userguide/configure.md b/versioned_docs/version-v2.5.0/userguide/configure.md index 1ab1d04..8e3a34f 100644 --- a/versioned_docs/version-v2.5.0/userguide/configure.md +++ b/versioned_docs/version-v2.5.0/userguide/configure.md @@ -18,11 +18,11 @@ You can update these configurations using one of the following methods: 2. Modify Helm Chart: Update the corresponding values in the [ConfigMap](https://raw.githubusercontent.com/archlitchi/HAMi/refs/heads/master/charts/hami/templates/scheduler/device-configmap.yaml), then reapply the Helm Chart to regenerate the ConfigMap. * `nvidia.deviceMemoryScaling:` - Float type, by default: 1. The ratio for NVIDIA device memory scaling, can be greater than 1 (enable virtual device memory, experimental feature). For NVIDIA GPU with *M* memory, if we set `nvidia.deviceMemoryScaling` argument to *S*, vGPUs splitted by this GPU will totally get `S * M` memory in Kubernetes with our device plugin. + Float type, by default: 1. The ratio for NVIDIA device memory scaling, can be greater than 1 (enable virtual device memory, experimental feature). For NVIDIA GPU with *M* memory, if we set `nvidia.deviceMemoryScaling` argument to *S*, vGPUs split by this GPU will totally get `S * M` memory in Kubernetes with our device plugin. * `nvidia.deviceSplitCount:` Integer type, by default: equals 10. Maximum tasks assigned to a simple GPU device. * `nvidia.migstrategy:` - String type, "none" for ignoring MIG features or "mixed" for allocating MIG device by seperate resources. Default "none" + String type, "none" for ignoring MIG features or "mixed" for allocating MIG device by separate resources. Default "none" * `nvidia.disablecorelimit:` String type, "true" for disable core limit, "false" for enable core limit, default: false * `nvidia.defaultMem:` @@ -87,13 +87,13 @@ helm install hami hami-charts/hami --set devicePlugin.deviceMemoryScaling=5 ... * `GPU_CORE_UTILIZATION_POLICY:` String type, "default", "force", "disable" default: "default" - "default" means the dafault utilization policy + "default" means the default utilization policy "force" means the container will always limit the core utilization below "nvidia.com/gpucores" "disable" means the container will ignore the utilization limitation set by "nvidia.com/gpucores" during task execution * `CUDA_DISABLE_CONTROL` Bool type, "true","false" default: false - "true" means the HAMi-core will not be used inside container, as a result, there will be no resource isolation and limitaion in that container, only for debug. + "true" means the HAMi-core will not be used inside container, as a result, there will be no resource isolation and limitation in that container, only for debug. - \ No newline at end of file + diff --git a/versioned_docs/version-v2.5.0/userguide/monitoring/real-time-device-usage.md b/versioned_docs/version-v2.5.0/userguide/monitoring/real-time-device-usage.md index 66ec3ab..230e5b1 100644 --- a/versioned_docs/version-v2.5.0/userguide/monitoring/real-time-device-usage.md +++ b/versioned_docs/version-v2.5.0/userguide/monitoring/real-time-device-usage.md @@ -14,9 +14,9 @@ It contains the following metrics: | Metrics | Description | Example | |----------|-------------|---------| -| Device_memory_desc_of_container | Container device meory real-time usage | `{context="0",ctrname="2-1-3-pod-1",data="0",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",module="0",offset="0",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 0 | +| Device_memory_desc_of_container | Container device memory real-time usage | `{context="0",ctrname="2-1-3-pod-1",data="0",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",module="0",offset="0",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 0 | | Device_utilization_desc_of_containert | Container device real-time utilization | `{ctrname="2-1-3-pod-1",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 0 | | HostCoreUtilization | GPU real-time utilization on host | `{deviceidx="0",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",zone="vGPU"}` 0 | | HostGPUMemoryUsage | GPU real-time device memory usage on host | `{deviceidx="0",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",zone="vGPU"}` 2.87244288e+08 | | vGPU_device_memory_limit_in_bytes | device limit for a certain container | `{ctrname="2-1-3-pod-1",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 2.62144e+09 | -| vGPU_device_memory_usage_in_bytes | device usage for a certain container | `{ctrname="2-1-3-pod-1",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 0 | \ No newline at end of file +| vGPU_device_memory_usage_in_bytes | device usage for a certain container | `{ctrname="2-1-3-pod-1",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 0 | diff --git a/versioned_docs/version-v2.5.0/userguide/volcano-vgpu/NVIDIA GPU/examples/use_exclusive_gpu.md b/versioned_docs/version-v2.5.0/userguide/volcano-vgpu/NVIDIA GPU/examples/use_exclusive_gpu.md index 700597e..21feb72 100644 --- a/versioned_docs/version-v2.5.0/userguide/volcano-vgpu/NVIDIA GPU/examples/use_exclusive_gpu.md +++ b/versioned_docs/version-v2.5.0/userguide/volcano-vgpu/NVIDIA GPU/examples/use_exclusive_gpu.md @@ -4,7 +4,7 @@ title: Exclusive gpu usage ## Job description -To allocate an exlusive GPU, you need only assign `volcano.sh/vgpu-number` without any other `volcano.sh/xxx` fields, as the example below: +To allocate an exclusive GPU, you need only assign `volcano.sh/vgpu-number` without any other `volcano.sh/xxx` fields, as the example below: ```yaml apiVersion: v1 diff --git a/versioned_docs/version-v2.5.0/userguide/volcano-vgpu/NVIDIA GPU/how-to-use-volcano-vgpu.md b/versioned_docs/version-v2.5.0/userguide/volcano-vgpu/NVIDIA GPU/how-to-use-volcano-vgpu.md index 23cee22..02815df 100644 --- a/versioned_docs/version-v2.5.0/userguide/volcano-vgpu/NVIDIA GPU/how-to-use-volcano-vgpu.md +++ b/versioned_docs/version-v2.5.0/userguide/volcano-vgpu/NVIDIA GPU/how-to-use-volcano-vgpu.md @@ -108,7 +108,7 @@ spec: resources: limits: volcano.sh/vgpu-number: 2 # requesting 2 gpu cards - volcano.sh/vgpu-memory: 3000 # (optinal)each vGPU uses 3G device memory + volcano.sh/vgpu-memory: 3000 # (optional)each vGPU uses 3G device memory volcano.sh/vgpu-cores: 50 # (optional)each vGPU uses 50% core EOF ``` diff --git a/versioned_docs/version-v2.5.1/contributor/goverance.md b/versioned_docs/version-v2.5.1/contributor/goverance.md deleted file mode 100644 index 86e36fc..0000000 --- a/versioned_docs/version-v2.5.1/contributor/goverance.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: Goverance ---- - -Heterogeneous AI Computing Virtualization Middleware (HAMi), formerly known as k8s-vGPU-scheduler, is an "all-in-one" tools designed to manage Heterogeneous AI Computing Devices in a k8s cluster - -- [HAMi Project Governance](#hami-project-governance) - - [Values](#values) - - [Membership](#membership) - - [Meetings](#meetings) - - [Code of Conduct](#code-of-conduct) - - [Modifying this Charter](#modifying-this-charter) - -## Values - -The HAMi and its leadership embrace the following values: - -* Openness: Communication and decision-making happens in the open and is discoverable for future - reference. As much as possible, all discussions and work take place in public - forums and open repositories. - -* Fairness: All stakeholders have the opportunity to provide feedback and submit - contributions, which will be considered on their merits. - -* Community over Product or Company: Sustaining and growing our community takes - priority over shipping code or sponsors' organizational goals. Each - contributor participates in the project as an individual. - -* Inclusivity: We innovate through different perspectives and skill sets, which - can only be accomplished in a welcoming and respectful environment. - -* Participation: Responsibilities within the project are earned through - participation, and there is a clear path up the contributor ladder into leadership - positions. - -## Membership - -Currently, the maintainers are the governing body for the project. This may -change as the community grows, such as by adopting an elected steering committee. - -## Meetings - -Time zones permitting, Maintainers are expected to participate in the public -developer meeting, which occurs -[Google Docs](https://docs.google.com/document/d/1YC6hco03_oXbF9IOUPJ29VWEddmITIKIfSmBX8JtGBw/edit). - -Maintainers will also have closed meetings in order to discuss security reports -or Code of Conduct violations. Such meetings should be scheduled by any -Maintainer on receipt of a security issue or CoC report. All current Maintainers -must be invited to such closed meetings, except for any Maintainer who is -accused of a CoC violation. - -## Code of Conduct - -[Code of Conduct](https://github.com/cncf/foundation/blob/main/code-of-conduct.md) -violations by community members will be referred to the CNCF Code of Conduct -Committee. Should the CNCF CoC Committee need to work with the project on resolution, the -Maintainers will appoint a non-involved contributor to work with them. - -## Modifying this Charter - -Changes to this Governance and its supporting documents may be approved by -a 2/3 vote of the Maintainers. \ No newline at end of file diff --git a/versioned_docs/version-v2.5.1/contributor/governance.md b/versioned_docs/version-v2.5.1/contributor/governance.md new file mode 100644 index 0000000..e36e468 --- /dev/null +++ b/versioned_docs/version-v2.5.1/contributor/governance.md @@ -0,0 +1,63 @@ +--- +title: Governance +--- + +Heterogeneous AI Computing Virtualization Middleware (HAMi), formerly known as k8s-vGPU-scheduler, is an "all-in-one" tools designed to manage Heterogeneous AI Computing Devices in a k8s cluster + +- [HAMi Project Governance](#hami-project-governance) + - [Values](#values) + - [Membership](#membership) + - [Meetings](#meetings) + - [Code of Conduct](#code-of-conduct) + - [Modifying this Charter](#modifying-this-charter) + +## Values + +The HAMi and its leadership embrace the following values: + +* Openness: Communication and decision-making happens in the open and is discoverable for future + reference. As much as possible, all discussions and work take place in public + forums and open repositories. + +* Fairness: All stakeholders have the opportunity to provide feedback and submit + contributions, which will be considered on their merits. + +* Community over Product or Company: Sustaining and growing our community takes + priority over shipping code or sponsors' organizational goals. Each + contributor participates in the project as an individual. + +* Inclusivity: We innovate through different perspectives and skill sets, which + can only be accomplished in a welcoming and respectful environment. + +* Participation: Responsibilities within the project are earned through + participation, and there is a clear path up the contributor ladder into leadership + positions. + +## Membership + +Currently, the maintainers are the governing body for the project. This may +change as the community grows, such as by adopting an elected steering committee. + +## Meetings + +Time zones permitting, Maintainers are expected to participate in the public +developer meeting, which occurs +[Google Docs](https://docs.google.com/document/d/1YC6hco03_oXbF9IOUPJ29VWEddmITIKIfSmBX8JtGBw/edit). + +Maintainers will also have closed meetings in order to discuss security reports +or Code of Conduct violations. Such meetings should be scheduled by any +Maintainer on receipt of a security issue or CoC report. All current Maintainers +must be invited to such closed meetings, except for any Maintainer who is +accused of a CoC violation. + +## Code of Conduct + +[Code of Conduct](https://github.com/cncf/foundation/blob/main/code-of-conduct.md) +violations by community members will be referred to the CNCF Code of Conduct +Committee. Should the CNCF CoC Committee need to work with the project on resolution, the +Maintainers will appoint a non-involved contributor to work with them. + +## Modifying this Charter + +Changes to this Governance and its supporting documents may be approved by +a 2/3 vote of the Maintainers. diff --git a/versioned_docs/version-v2.5.1/contributor/ladder.md b/versioned_docs/version-v2.5.1/contributor/ladder.md index 943bf56..0ed9790 100644 --- a/versioned_docs/version-v2.5.1/contributor/ladder.md +++ b/versioned_docs/version-v2.5.1/contributor/ladder.md @@ -1,5 +1,5 @@ --- -title: Contributer Ladder +title: Contributor Ladder --- This docs different ways to get involved and level up within the project. You can see different roles within the project in the contributor roles. @@ -181,4 +181,4 @@ Involuntary removal/demotion of a contributor happens when responsibilities and Involuntary removal or demotion is handled through a vote by a majority of the current Maintainers. -[two-factor authentication]: https://help.github.com/articles/about-two-factor-authentication \ No newline at end of file +[two-factor authentication]: https://help.github.com/articles/about-two-factor-authentication diff --git a/versioned_docs/version-v2.5.1/developers/Dynamic-mig.md b/versioned_docs/version-v2.5.1/developers/Dynamic-mig.md index 139e3ac..3111f34 100644 --- a/versioned_docs/version-v2.5.1/developers/Dynamic-mig.md +++ b/versioned_docs/version-v2.5.1/developers/Dynamic-mig.md @@ -19,7 +19,7 @@ HAMi is done by using [hami-core](https://github.com/Project-HAMi/HAMi-core), wh - CPU, Mem, and GPU combined schedule - GPU dynamic slice: Hami-core and MIG - Support node-level binpack and spread by GPU memory, CPU and Mem -- A unified vGPU Pool different virtualization technics +- A unified vGPU Pool different virtualization techniques - Tasks can choose to use MIG, use HAMi-core, or use both. ### Config maps @@ -104,7 +104,7 @@ data: ## Examples -Dynamic mig is compatable with hami tasks, as the example below: +Dynamic mig is compatible with hami tasks, as the example below: Just Setting `nvidia.com/gpu` and `nvidia.com/gpumem`. ```yaml @@ -149,9 +149,9 @@ The Procedure of a vGPU task which uses dynamic-mig is shown below: -Note that after submited a task, deviceshare plugin will iterate over templates defined in configMap `hami-scheduler-device`, and find the first available template to fit. You can always change the content of that configMap, and restart vc-scheduler to customize. +Note that after submitted a task, deviceshare plugin will iterate over templates defined in configMap `hami-scheduler-device`, and find the first available template to fit. You can always change the content of that configMap, and restart vc-scheduler to customize. -If you submit the example on an empty A100-PCIE-40GB node, then it will select a GPU and chosse MIG template below: +If you submit the example on an empty A100-PCIE-40GB node, then it will select a GPU and choose MIG template below: ```yaml 2g.10gb : 3 diff --git a/versioned_docs/version-v2.5.1/developers/HAMi-core-design.md b/versioned_docs/version-v2.5.1/developers/HAMi-core-design.md index 17155dc..9bd8b22 100644 --- a/versioned_docs/version-v2.5.1/developers/HAMi-core-design.md +++ b/versioned_docs/version-v2.5.1/developers/HAMi-core-design.md @@ -4,14 +4,14 @@ title: HAMi-core design ## Introduction -HAMi-core is a hook library for CUDA environment, it is the in-container gpu resource controller, it has beed adopted by [HAMi](https://github.com/HAMi-project/HAMi), [volcano](https://github.com/volcano-sh/devices) +HAMi-core is a hook library for CUDA environment, it is the in-container gpu resource controller, it has been adopted by [HAMi](https://github.com/HAMi-project/HAMi), [volcano](https://github.com/volcano-sh/devices) ![img](../resources/hami-arch.png) ## Features HAMi-core has the following features: -1. Virtualize device meory +1. Virtualize device memory ![image](../resources/sample_nvidia-smi.png) diff --git a/versioned_docs/version-v2.5.1/developers/scheduling.md b/versioned_docs/version-v2.5.1/developers/scheduling.md index c033ece..3ea6b6a 100644 --- a/versioned_docs/version-v2.5.1/developers/scheduling.md +++ b/versioned_docs/version-v2.5.1/developers/scheduling.md @@ -82,7 +82,7 @@ GPU spread, use different GPU cards when possible, egs: ### Node-scheduler-policy -![node-shceduler-policy-demo.png](../resources/node-shceduler-policy-demo.png) +![node-scheduler-policy-demo.png](../resources/node-scheduler-policy-demo.png) #### Binpack @@ -166,4 +166,4 @@ GPU1 Score: ((20+10)/100 + (1000+2000)/8000)) * 10 = 6.75 GPU2 Score: ((20+70)/100 + (1000+6000)/8000)) * 10 = 17.75 ``` -So, in `Spread` policy we can select `GPU1`. \ No newline at end of file +So, in `Spread` policy we can select `GPU1`. diff --git a/versioned_docs/version-v2.5.1/get-started/nginx-example.md b/versioned_docs/version-v2.5.1/get-started/nginx-example.md index af65307..2cbd118 100644 --- a/versioned_docs/version-v2.5.1/get-started/nginx-example.md +++ b/versioned_docs/version-v2.5.1/get-started/nginx-example.md @@ -138,7 +138,7 @@ spec: nvidia.com/gpumem: 10240 # Each vGPU contains 10240m device memory (Optional,Integer) ``` -#### Verify in container resouce control +#### Verify in container resource control Execute the following query command: diff --git a/versioned_docs/version-v2.5.1/installation/how-to-use-volcano-vgpu.md b/versioned_docs/version-v2.5.1/installation/how-to-use-volcano-vgpu.md index bbc39b1..95bcc90 100644 --- a/versioned_docs/version-v2.5.1/installation/how-to-use-volcano-vgpu.md +++ b/versioned_docs/version-v2.5.1/installation/how-to-use-volcano-vgpu.md @@ -113,7 +113,7 @@ spec: resources: limits: volcano.sh/vgpu-number: 2 # requesting 2 gpu cards - volcano.sh/vgpu-memory: 3000 # (optinal)each vGPU uses 3G device memory + volcano.sh/vgpu-memory: 3000 # (optional)each vGPU uses 3G device memory volcano.sh/vgpu-cores: 50 # (optional)each vGPU uses 50% core EOF ``` diff --git a/versioned_docs/version-v2.5.1/installation/offline-installation.md b/versioned_docs/version-v2.5.1/installation/offline-installation.md index cb8d41e..bdd7f1f 100644 --- a/versioned_docs/version-v2.5.1/installation/offline-installation.md +++ b/versioned_docs/version-v2.5.1/installation/offline-installation.md @@ -21,8 +21,8 @@ Load the images, tag them with your internal registry, and push them to your reg docker load -i {HAMi_image}.tar docker tag projecthami/hami:{HAMi version} {your_inner_registry}/hami:{HAMi version} docker push {your_inner_registry}/hami:{HAMi version} -docker tag docker.io/jettech/kube-webhook-certgen:v1.5.2 {your inner_regisry}/kube-webhook-certgen:v1.5.2 -docker push {your inner_regisry}/kube-webhook-certgen:v1.5.2 +docker tag docker.io/jettech/kube-webhook-certgen:v1.5.2 {your inner_registry}/kube-webhook-certgen:v1.5.2 +docker push {your inner_registry}/kube-webhook-certgen:v1.5.2 docker tag liangjw/kube-webhook-certgen:v1.1.1 {your_inner_registry}/kube-webhook-certgen:v1.1.1 docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:{your kubernetes version} {your_inner_registry}/kube-scheduler:{your kubernetes version} docker push {your_inner_registry}/kube-scheduler:{your kubernetes version} @@ -31,7 +31,7 @@ docker push {your_inner_registry}/kube-scheduler:{your kubernetes version} ## Prepare HAMi chart Download the charts folder from [github](https://github.com/Project-HAMi/HAMi/tree/master/charts), -place it into $\{CHART_PATH\} inside cluser, then edit the following fields in $\{CHART_PATH\}/hami/values.yaml. +place it into $\{CHART_PATH\} inside cluster, then edit the following fields in $\{CHART_PATH\}/hami/values.yaml. ```yaml scheduler: diff --git a/versioned_docs/version-v2.5.1/key-features/device-resource-isolation.md b/versioned_docs/version-v2.5.1/key-features/device-resource-isolation.md index 1b84235..cf966d1 100644 --- a/versioned_docs/version-v2.5.1/key-features/device-resource-isolation.md +++ b/versioned_docs/version-v2.5.1/key-features/device-resource-isolation.md @@ -2,7 +2,7 @@ title: Device resource isolation --- -A simple demostration for device isolation: +A simple demonstration for device isolation: A task with the following resources. ``` @@ -14,4 +14,4 @@ A task with the following resources. will see 3G device memory inside container -![img](../resources/hard_limit.jpg) \ No newline at end of file +![img](../resources/hard_limit.jpg) diff --git a/versioned_docs/version-v2.5.1/releases.md b/versioned_docs/version-v2.5.1/releases.md index 150ef87..e6516f1 100644 --- a/versioned_docs/version-v2.5.1/releases.md +++ b/versioned_docs/version-v2.5.1/releases.md @@ -79,7 +79,7 @@ Hence, if an issue is important it is important to advocate its priority early i \ No newline at end of file +``` --> diff --git a/versioned_docs/version-v2.5.1/resources/installation/install-binary/generate_cert/generate_ca.sh b/versioned_docs/version-v2.5.1/resources/installation/install-binary/generate_cert/generate_ca.sh index 519b735..7b3cf6d 100755 --- a/versioned_docs/version-v2.5.1/resources/installation/install-binary/generate_cert/generate_ca.sh +++ b/versioned_docs/version-v2.5.1/resources/installation/install-binary/generate_cert/generate_ca.sh @@ -1,6 +1,6 @@ #!/bin/bash -# genererate front-proxy-ca, server-ca +# generate front-proxy-ca, server-ca set -e set -o pipefail @@ -23,4 +23,4 @@ function main() { gen_front_proxy_ca } -main "$@" \ No newline at end of file +main "$@" diff --git a/versioned_docs/version-v2.5.1/resources/installation/install-binary/generate_cert/generate_etcd.sh b/versioned_docs/version-v2.5.1/resources/installation/install-binary/generate_cert/generate_etcd.sh index 7c82b46..7fcce7f 100755 --- a/versioned_docs/version-v2.5.1/resources/installation/install-binary/generate_cert/generate_etcd.sh +++ b/versioned_docs/version-v2.5.1/resources/installation/install-binary/generate_cert/generate_etcd.sh @@ -1,6 +1,6 @@ #!/bin/bash -# genererate CA & leaf certificates of etcd. +# generate CA & leaf certificates of etcd. set -e set -o pipefail @@ -45,4 +45,4 @@ function main() { generate_leaf_certs } -main "$@" \ No newline at end of file +main "$@" diff --git a/versioned_docs/version-v2.5.1/resources/node-shceduler-policy-demo.png b/versioned_docs/version-v2.5.1/resources/node-scheduler-policy-demo.png similarity index 100% rename from versioned_docs/version-v2.5.1/resources/node-shceduler-policy-demo.png rename to versioned_docs/version-v2.5.1/resources/node-scheduler-policy-demo.png diff --git a/versioned_docs/version-v2.5.1/userguide/Cambricon-device/enable-cambricon-mlu-sharing.md b/versioned_docs/version-v2.5.1/userguide/Cambricon-device/enable-cambricon-mlu-sharing.md index 86b850a..ae498ab 100644 --- a/versioned_docs/version-v2.5.1/userguide/Cambricon-device/enable-cambricon-mlu-sharing.md +++ b/versioned_docs/version-v2.5.1/userguide/Cambricon-device/enable-cambricon-mlu-sharing.md @@ -22,7 +22,7 @@ title: Enable cambricon MLU sharing ## Enabling MLU-sharing Support -* Contact your device provider to aquire cambricon-device-plugin>2.0.9, edit parameter `mode` to 'dynamic-smlu` in containers.args field. +* Contact your device provider to acquire cambricon-device-plugin>2.0.9, edit parameter `mode` to 'dynamic-smlu` in containers.args field. ``` args: @@ -63,4 +63,4 @@ spec: cambricon.com/mlu.smlu.vcore: 10 # each MLU requesting 10% MLU device core ``` -> **NOTICE:** *`vmemory` and `vcore` can only work when `cambricon.com/mlunum=1`* \ No newline at end of file +> **NOTICE:** *`vmemory` and `vcore` can only work when `cambricon.com/mlunum=1`* diff --git a/versioned_docs/version-v2.5.1/userguide/Hygon-device/enable-hygon-dcu-sharing.md b/versioned_docs/version-v2.5.1/userguide/Hygon-device/enable-hygon-dcu-sharing.md index f667870..a90f408 100644 --- a/versioned_docs/version-v2.5.1/userguide/Hygon-device/enable-hygon-dcu-sharing.md +++ b/versioned_docs/version-v2.5.1/userguide/Hygon-device/enable-hygon-dcu-sharing.md @@ -78,4 +78,4 @@ Launch your DCU tasks like you usually do 2. DCU-sharing in init container is not supported, pods with "hygon.com/dcumem" in init container will never be scheduled. -3. Only one vdcu can be aquired per container. If you want to mount multiple dcu devices, then you shouldn't set `hygon.com/dcumem` or `hygon.com/dcucores` +3. Only one vdcu can be acquired per container. If you want to mount multiple dcu devices, then you shouldn't set `hygon.com/dcumem` or `hygon.com/dcucores` diff --git a/versioned_docs/version-v2.5.1/userguide/Metax-device/enable-metax-gpu-schedule.md b/versioned_docs/version-v2.5.1/userguide/Metax-device/enable-metax-gpu-schedule.md index 46d8e8b..0d90f77 100644 --- a/versioned_docs/version-v2.5.1/userguide/Metax-device/enable-metax-gpu-schedule.md +++ b/versioned_docs/version-v2.5.1/userguide/Metax-device/enable-metax-gpu-schedule.md @@ -9,7 +9,7 @@ When multiple GPUs are configured on a single server, the GPU cards are connecte ![img](../../resources/metax_topo.jpg) -A user job requests a certain number of metax-tech.com/gpu resources, Kubernetes schedule pods to the appropriate node. gpu-device further processes the logic of allocating the remaining resources on the resource node following criterias below: +A user job requests a certain number of metax-tech.com/gpu resources, Kubernetes schedule pods to the appropriate node. gpu-device further processes the logic of allocating the remaining resources on the resource node following criteria below: 1. MetaXLink takes precedence over PCIe Switch in two way: – A connection is considered a MetaXLink connection when there is a MetaXLink connection and a PCIe Switch connection between the two cards. – When both the MetaXLink and the PCIe Switch can meet the job request @@ -36,7 +36,7 @@ Equipped with MetaXLink interconnected resources. ## Enabling topo-awareness scheduling -* Deploy Metax GPU Extensions on metax nodes (Please consult your device provider to aquire its package and document) +* Deploy Metax GPU Extensions on metax nodes (Please consult your device provider to acquire its package and document) * Deploy HAMi according to README.md @@ -64,4 +64,4 @@ spec: > **NOTICE2:** *You can find more examples in examples folder - \ No newline at end of file + diff --git a/versioned_docs/version-v2.5.1/userguide/Metax-device/enable-metax-gpu-sharing.md b/versioned_docs/version-v2.5.1/userguide/Metax-device/enable-metax-gpu-sharing.md index 9819419..1604590 100644 --- a/versioned_docs/version-v2.5.1/userguide/Metax-device/enable-metax-gpu-sharing.md +++ b/versioned_docs/version-v2.5.1/userguide/Metax-device/enable-metax-gpu-sharing.md @@ -18,7 +18,7 @@ title: Enable Metax GPU sharing ## Enabling GPU-sharing Support -* Deploy Metax GPU Operator on metax nodes (Please consult your device provider to aquire its package and document) +* Deploy Metax GPU Operator on metax nodes (Please consult your device provider to acquire its package and document) * Deploy HAMi according to README.md diff --git a/versioned_docs/version-v2.5.1/userguide/Metax-device/examples/allocate-binpack.md b/versioned_docs/version-v2.5.1/userguide/Metax-device/examples/allocate-binpack.md index 902a490..62f77fb 100644 --- a/versioned_docs/version-v2.5.1/userguide/Metax-device/examples/allocate-binpack.md +++ b/versioned_docs/version-v2.5.1/userguide/Metax-device/examples/allocate-binpack.md @@ -4,7 +4,7 @@ title: Binpack schedule policy ## Allocate metax device using binpack schedule policy -To allocate metax device with mininum damage to topology, you need to only assign `metax-tech.com/gpu` with annotations `hami.io/node-scheduler-policy`=`binpack` +To allocate metax device with minimum damage to topology, you need to only assign `metax-tech.com/gpu` with annotations `hami.io/node-scheduler-policy`=`binpack` ``` apiVersion: v1 @@ -22,4 +22,4 @@ spec: resources: limits: metax-tech.com/gpu: 1 # requesting 1 metax GPU -``` \ No newline at end of file +``` diff --git a/versioned_docs/version-v2.5.1/userguide/Metax-device/specify-binpack-task.md b/versioned_docs/version-v2.5.1/userguide/Metax-device/specify-binpack-task.md index f9c56fe..1900d49 100644 --- a/versioned_docs/version-v2.5.1/userguide/Metax-device/specify-binpack-task.md +++ b/versioned_docs/version-v2.5.1/userguide/Metax-device/specify-binpack-task.md @@ -4,7 +4,7 @@ title: Binpack schedule policy ## Set schedule policy to binpack -To allocate metax device with mininum damage to topology, you need to only assign `metax-tech.com/gpu` with annotations `hami.io/node-scheduler-policy`=`binpack` +To allocate metax device with minimum damage to topology, you need to only assign `metax-tech.com/gpu` with annotations `hami.io/node-scheduler-policy`=`binpack` ``` metadata: diff --git a/versioned_docs/version-v2.5.1/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md b/versioned_docs/version-v2.5.1/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md index 009a61f..9942c98 100644 --- a/versioned_docs/version-v2.5.1/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md +++ b/versioned_docs/version-v2.5.1/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md @@ -29,7 +29,7 @@ title: Enable Mthreads GPU sharing ## Enabling GPU-sharing Support -* Deploy MT-CloudNative Toolkit on mthreads nodes (Please consult your device provider to aquire its package and document) +* Deploy MT-CloudNative Toolkit on mthreads nodes (Please consult your device provider to acquire its package and document) > **NOTICE:** *You can remove mt-mutating-webhook and mt-gpu-scheduler after installation(optional).* @@ -68,4 +68,4 @@ spec: > **NOTICE2:** *You can find more examples in examples folder* - \ No newline at end of file + diff --git a/versioned_docs/version-v2.5.1/userguide/NVIDIA-device/examples/specify-card-type-to-use.md b/versioned_docs/version-v2.5.1/userguide/NVIDIA-device/examples/specify-card-type-to-use.md index ce0bef0..397e984 100644 --- a/versioned_docs/version-v2.5.1/userguide/NVIDIA-device/examples/specify-card-type-to-use.md +++ b/versioned_docs/version-v2.5.1/userguide/NVIDIA-device/examples/specify-card-type-to-use.md @@ -24,4 +24,4 @@ spec: nvidia.com/gpu: 2 # requesting 2 vGPUs ``` -> **NOTICE:** * You can assign this task to multiple GPU types, use comma to seperate,In this example, we want to run this job on A100 or V100* \ No newline at end of file +> **NOTICE:** * You can assign this task to multiple GPU types, use comma to separate,In this example, we want to run this job on A100 or V100* diff --git a/versioned_docs/version-v2.5.1/userguide/NVIDIA-device/specify-device-type-to-use.md b/versioned_docs/version-v2.5.1/userguide/NVIDIA-device/specify-device-type-to-use.md index 0ec756e..3d3eef6 100644 --- a/versioned_docs/version-v2.5.1/userguide/NVIDIA-device/specify-device-type-to-use.md +++ b/versioned_docs/version-v2.5.1/userguide/NVIDIA-device/specify-device-type-to-use.md @@ -11,7 +11,7 @@ For example, a task with the following annotation will be assigned to A100 or V1 ```yaml metadata: annotations: - nvidia.com/use-gputype: "A100,V100" # Specify the card type for this job, use comma to seperate, will not launch job on non-specified card + nvidia.com/use-gputype: "A100,V100" # Specify the card type for this job, use comma to separate, will not launch job on non-specified card ``` A task may use `nvidia.com/nouse-gputype` to evade certain type of GPU. In this following example, that job won't be assigned to 1080(include 1080Ti) or 2080(include 2080Ti) type of card. @@ -19,5 +19,5 @@ A task may use `nvidia.com/nouse-gputype` to evade certain type of GPU. In this ```yaml metadata: annotations: - nvidia.com/nouse-gputype: "1080,2080" # Specify the blacklist card type for this job, use comma to seperate, will not launch job on specified card + nvidia.com/nouse-gputype: "1080,2080" # Specify the blacklist card type for this job, use comma to separate, will not launch job on specified card ``` diff --git a/versioned_docs/version-v2.5.1/userguide/monitoring/real-time-device-usage.md b/versioned_docs/version-v2.5.1/userguide/monitoring/real-time-device-usage.md index e0ee6a4..78e44d9 100644 --- a/versioned_docs/version-v2.5.1/userguide/monitoring/real-time-device-usage.md +++ b/versioned_docs/version-v2.5.1/userguide/monitoring/real-time-device-usage.md @@ -14,9 +14,9 @@ It contains the following metrics: | Metrics | Description | Example | |----------|-------------|---------| -| Device_memory_desc_of_container | Container device meory real-time usage | `{context="0",ctrname="2-1-3-pod-1",data="0",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",module="0",offset="0",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 0 | +| Device_memory_desc_of_container | Container device memory real-time usage | `{context="0",ctrname="2-1-3-pod-1",data="0",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",module="0",offset="0",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 0 | | Device_utilization_desc_of_container | Container device real-time utilization | `{ctrname="2-1-3-pod-1",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 0 | | HostCoreUtilization | GPU real-time utilization on host | `{deviceidx="0",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",zone="vGPU"}` 0 | | HostGPUMemoryUsage | GPU real-time device memory usage on host | `{deviceidx="0",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",zone="vGPU"}` 2.87244288e+08 | | vGPU_device_memory_limit_in_bytes | device limit for a certain container | `{ctrname="2-1-3-pod-1",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 2.62144e+09 | -| vGPU_device_memory_usage_in_bytes | device usage for a certain container | `{ctrname="2-1-3-pod-1",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 0 | \ No newline at end of file +| vGPU_device_memory_usage_in_bytes | device usage for a certain container | `{ctrname="2-1-3-pod-1",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 0 | diff --git a/versioned_docs/version-v2.5.1/userguide/volcano-vgpu/NVIDIA-GPU/examples/use_exclusive_gpu.md b/versioned_docs/version-v2.5.1/userguide/volcano-vgpu/NVIDIA-GPU/examples/use_exclusive_gpu.md index 700597e..21feb72 100644 --- a/versioned_docs/version-v2.5.1/userguide/volcano-vgpu/NVIDIA-GPU/examples/use_exclusive_gpu.md +++ b/versioned_docs/version-v2.5.1/userguide/volcano-vgpu/NVIDIA-GPU/examples/use_exclusive_gpu.md @@ -4,7 +4,7 @@ title: Exclusive gpu usage ## Job description -To allocate an exlusive GPU, you need only assign `volcano.sh/vgpu-number` without any other `volcano.sh/xxx` fields, as the example below: +To allocate an exclusive GPU, you need only assign `volcano.sh/vgpu-number` without any other `volcano.sh/xxx` fields, as the example below: ```yaml apiVersion: v1 diff --git a/versioned_docs/version-v2.5.1/userguide/volcano-vgpu/NVIDIA-GPU/how-to-use-volcano-vgpu.md b/versioned_docs/version-v2.5.1/userguide/volcano-vgpu/NVIDIA-GPU/how-to-use-volcano-vgpu.md index 036a9d6..1658ca2 100644 --- a/versioned_docs/version-v2.5.1/userguide/volcano-vgpu/NVIDIA-GPU/how-to-use-volcano-vgpu.md +++ b/versioned_docs/version-v2.5.1/userguide/volcano-vgpu/NVIDIA-GPU/how-to-use-volcano-vgpu.md @@ -113,7 +113,7 @@ spec: resources: limits: volcano.sh/vgpu-number: 2 # requesting 2 gpu cards - volcano.sh/vgpu-memory: 3000 # (optinal)each vGPU uses 3G device memory + volcano.sh/vgpu-memory: 3000 # (optional)each vGPU uses 3G device memory volcano.sh/vgpu-cores: 50 # (optional)each vGPU uses 50% core EOF ``` diff --git a/versioned_docs/version-v2.6.0/contributor/contributers.md b/versioned_docs/version-v2.6.0/contributor/contributors.md similarity index 100% rename from versioned_docs/version-v2.6.0/contributor/contributers.md rename to versioned_docs/version-v2.6.0/contributor/contributors.md diff --git a/versioned_docs/version-v2.6.0/contributor/goverance.md b/versioned_docs/version-v2.6.0/contributor/goverance.md deleted file mode 100644 index 86e36fc..0000000 --- a/versioned_docs/version-v2.6.0/contributor/goverance.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -title: Goverance ---- - -Heterogeneous AI Computing Virtualization Middleware (HAMi), formerly known as k8s-vGPU-scheduler, is an "all-in-one" tools designed to manage Heterogeneous AI Computing Devices in a k8s cluster - -- [HAMi Project Governance](#hami-project-governance) - - [Values](#values) - - [Membership](#membership) - - [Meetings](#meetings) - - [Code of Conduct](#code-of-conduct) - - [Modifying this Charter](#modifying-this-charter) - -## Values - -The HAMi and its leadership embrace the following values: - -* Openness: Communication and decision-making happens in the open and is discoverable for future - reference. As much as possible, all discussions and work take place in public - forums and open repositories. - -* Fairness: All stakeholders have the opportunity to provide feedback and submit - contributions, which will be considered on their merits. - -* Community over Product or Company: Sustaining and growing our community takes - priority over shipping code or sponsors' organizational goals. Each - contributor participates in the project as an individual. - -* Inclusivity: We innovate through different perspectives and skill sets, which - can only be accomplished in a welcoming and respectful environment. - -* Participation: Responsibilities within the project are earned through - participation, and there is a clear path up the contributor ladder into leadership - positions. - -## Membership - -Currently, the maintainers are the governing body for the project. This may -change as the community grows, such as by adopting an elected steering committee. - -## Meetings - -Time zones permitting, Maintainers are expected to participate in the public -developer meeting, which occurs -[Google Docs](https://docs.google.com/document/d/1YC6hco03_oXbF9IOUPJ29VWEddmITIKIfSmBX8JtGBw/edit). - -Maintainers will also have closed meetings in order to discuss security reports -or Code of Conduct violations. Such meetings should be scheduled by any -Maintainer on receipt of a security issue or CoC report. All current Maintainers -must be invited to such closed meetings, except for any Maintainer who is -accused of a CoC violation. - -## Code of Conduct - -[Code of Conduct](https://github.com/cncf/foundation/blob/main/code-of-conduct.md) -violations by community members will be referred to the CNCF Code of Conduct -Committee. Should the CNCF CoC Committee need to work with the project on resolution, the -Maintainers will appoint a non-involved contributor to work with them. - -## Modifying this Charter - -Changes to this Governance and its supporting documents may be approved by -a 2/3 vote of the Maintainers. \ No newline at end of file diff --git a/versioned_docs/version-v1.3.0/contributor/goverance.md b/versioned_docs/version-v2.6.0/contributor/governance.md similarity index 99% rename from versioned_docs/version-v1.3.0/contributor/goverance.md rename to versioned_docs/version-v2.6.0/contributor/governance.md index 86e36fc..629d23e 100644 --- a/versioned_docs/version-v1.3.0/contributor/goverance.md +++ b/versioned_docs/version-v2.6.0/contributor/governance.md @@ -1,5 +1,5 @@ --- -title: Goverance +title: Governance --- Heterogeneous AI Computing Virtualization Middleware (HAMi), formerly known as k8s-vGPU-scheduler, is an "all-in-one" tools designed to manage Heterogeneous AI Computing Devices in a k8s cluster diff --git a/versioned_docs/version-v2.6.0/contributor/ladder.md b/versioned_docs/version-v2.6.0/contributor/ladder.md index 943bf56..c50d80e 100644 --- a/versioned_docs/version-v2.6.0/contributor/ladder.md +++ b/versioned_docs/version-v2.6.0/contributor/ladder.md @@ -1,5 +1,5 @@ --- -title: Contributer Ladder +title: Contributor Ladder --- This docs different ways to get involved and level up within the project. You can see different roles within the project in the contributor roles. diff --git a/versioned_docs/version-v2.6.0/developers/Dynamic-mig.md b/versioned_docs/version-v2.6.0/developers/Dynamic-mig.md index 139e3ac..3111f34 100644 --- a/versioned_docs/version-v2.6.0/developers/Dynamic-mig.md +++ b/versioned_docs/version-v2.6.0/developers/Dynamic-mig.md @@ -19,7 +19,7 @@ HAMi is done by using [hami-core](https://github.com/Project-HAMi/HAMi-core), wh - CPU, Mem, and GPU combined schedule - GPU dynamic slice: Hami-core and MIG - Support node-level binpack and spread by GPU memory, CPU and Mem -- A unified vGPU Pool different virtualization technics +- A unified vGPU Pool different virtualization techniques - Tasks can choose to use MIG, use HAMi-core, or use both. ### Config maps @@ -104,7 +104,7 @@ data: ## Examples -Dynamic mig is compatable with hami tasks, as the example below: +Dynamic mig is compatible with hami tasks, as the example below: Just Setting `nvidia.com/gpu` and `nvidia.com/gpumem`. ```yaml @@ -149,9 +149,9 @@ The Procedure of a vGPU task which uses dynamic-mig is shown below: -Note that after submited a task, deviceshare plugin will iterate over templates defined in configMap `hami-scheduler-device`, and find the first available template to fit. You can always change the content of that configMap, and restart vc-scheduler to customize. +Note that after submitted a task, deviceshare plugin will iterate over templates defined in configMap `hami-scheduler-device`, and find the first available template to fit. You can always change the content of that configMap, and restart vc-scheduler to customize. -If you submit the example on an empty A100-PCIE-40GB node, then it will select a GPU and chosse MIG template below: +If you submit the example on an empty A100-PCIE-40GB node, then it will select a GPU and choose MIG template below: ```yaml 2g.10gb : 3 diff --git a/versioned_docs/version-v2.6.0/developers/HAMi-core-design.md b/versioned_docs/version-v2.6.0/developers/HAMi-core-design.md index 17155dc..9bd8b22 100644 --- a/versioned_docs/version-v2.6.0/developers/HAMi-core-design.md +++ b/versioned_docs/version-v2.6.0/developers/HAMi-core-design.md @@ -4,14 +4,14 @@ title: HAMi-core design ## Introduction -HAMi-core is a hook library for CUDA environment, it is the in-container gpu resource controller, it has beed adopted by [HAMi](https://github.com/HAMi-project/HAMi), [volcano](https://github.com/volcano-sh/devices) +HAMi-core is a hook library for CUDA environment, it is the in-container gpu resource controller, it has been adopted by [HAMi](https://github.com/HAMi-project/HAMi), [volcano](https://github.com/volcano-sh/devices) ![img](../resources/hami-arch.png) ## Features HAMi-core has the following features: -1. Virtualize device meory +1. Virtualize device memory ![image](../resources/sample_nvidia-smi.png) diff --git a/versioned_docs/version-v2.6.0/developers/protocol.md b/versioned_docs/version-v2.6.0/developers/protocol.md index e9842c0..0c4f420 100644 --- a/versioned_docs/version-v2.6.0/developers/protocol.md +++ b/versioned_docs/version-v2.6.0/developers/protocol.md @@ -12,10 +12,10 @@ HAMi needs to know the spec of each AI devices in the cluster in order to schedu ``` hami.io/node-handshake-\{device-type\}: Reported_\{device_node_current_timestamp\} -hami.io/node-\{deivce-type\}-register: \{Device 1\}:\{Device2\}:...:\{Device N\} +hami.io/node-\{device-type\}-register: \{Device 1\}:\{Device2\}:...:\{Device N\} ``` -The definiation of each device is in the following format: +The definition of each device is in the following format: ``` \{Device UUID\},\{device split count\},\{device memory limit\},\{device core limit\},\{device type\},\{device numa\},\{healthy\} ``` diff --git a/versioned_docs/version-v2.6.0/developers/scheduling.md b/versioned_docs/version-v2.6.0/developers/scheduling.md index 8cedd26..cc87233 100644 --- a/versioned_docs/version-v2.6.0/developers/scheduling.md +++ b/versioned_docs/version-v2.6.0/developers/scheduling.md @@ -83,7 +83,7 @@ GPU spread, use different GPU cards when possible, egs: ### Node-scheduler-policy -![node-shceduler-policy-demo.png](../resources/node-shceduler-policy-demo.png) +![node-scheduler-policy-demo.png](../resources/node-scheduler-policy-demo.png) #### Binpack @@ -253,4 +253,4 @@ For example: If a Pod requests 3 GPUs, take **gpu0, gpu1, gpu2** as an example. (gpu1, gpu2, gpu3) totalScore: 200 + 100 + 200 = 500 ``` -Therefore, when a **Pod requests 3 GPUs**, we allocate **gpu1, gpu2, gpu3**. \ No newline at end of file +Therefore, when a **Pod requests 3 GPUs**, we allocate **gpu1, gpu2, gpu3**. diff --git a/versioned_docs/version-v2.6.0/get-started/nginx-example.md b/versioned_docs/version-v2.6.0/get-started/nginx-example.md index af65307..2cbd118 100644 --- a/versioned_docs/version-v2.6.0/get-started/nginx-example.md +++ b/versioned_docs/version-v2.6.0/get-started/nginx-example.md @@ -138,7 +138,7 @@ spec: nvidia.com/gpumem: 10240 # Each vGPU contains 10240m device memory (Optional,Integer) ``` -#### Verify in container resouce control +#### Verify in container resource control Execute the following query command: diff --git a/versioned_docs/version-v2.6.0/installation/how-to-use-volcano-vgpu.md b/versioned_docs/version-v2.6.0/installation/how-to-use-volcano-vgpu.md index bbc39b1..95bcc90 100644 --- a/versioned_docs/version-v2.6.0/installation/how-to-use-volcano-vgpu.md +++ b/versioned_docs/version-v2.6.0/installation/how-to-use-volcano-vgpu.md @@ -113,7 +113,7 @@ spec: resources: limits: volcano.sh/vgpu-number: 2 # requesting 2 gpu cards - volcano.sh/vgpu-memory: 3000 # (optinal)each vGPU uses 3G device memory + volcano.sh/vgpu-memory: 3000 # (optional)each vGPU uses 3G device memory volcano.sh/vgpu-cores: 50 # (optional)each vGPU uses 50% core EOF ``` diff --git a/versioned_docs/version-v2.6.0/installation/offline-installation.md b/versioned_docs/version-v2.6.0/installation/offline-installation.md index cb8d41e..bdd7f1f 100644 --- a/versioned_docs/version-v2.6.0/installation/offline-installation.md +++ b/versioned_docs/version-v2.6.0/installation/offline-installation.md @@ -21,8 +21,8 @@ Load the images, tag them with your internal registry, and push them to your reg docker load -i {HAMi_image}.tar docker tag projecthami/hami:{HAMi version} {your_inner_registry}/hami:{HAMi version} docker push {your_inner_registry}/hami:{HAMi version} -docker tag docker.io/jettech/kube-webhook-certgen:v1.5.2 {your inner_regisry}/kube-webhook-certgen:v1.5.2 -docker push {your inner_regisry}/kube-webhook-certgen:v1.5.2 +docker tag docker.io/jettech/kube-webhook-certgen:v1.5.2 {your inner_registry}/kube-webhook-certgen:v1.5.2 +docker push {your inner_registry}/kube-webhook-certgen:v1.5.2 docker tag liangjw/kube-webhook-certgen:v1.1.1 {your_inner_registry}/kube-webhook-certgen:v1.1.1 docker tag registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:{your kubernetes version} {your_inner_registry}/kube-scheduler:{your kubernetes version} docker push {your_inner_registry}/kube-scheduler:{your kubernetes version} @@ -31,7 +31,7 @@ docker push {your_inner_registry}/kube-scheduler:{your kubernetes version} ## Prepare HAMi chart Download the charts folder from [github](https://github.com/Project-HAMi/HAMi/tree/master/charts), -place it into $\{CHART_PATH\} inside cluser, then edit the following fields in $\{CHART_PATH\}/hami/values.yaml. +place it into $\{CHART_PATH\} inside cluster, then edit the following fields in $\{CHART_PATH\}/hami/values.yaml. ```yaml scheduler: diff --git a/versioned_docs/version-v2.6.0/key-features/device-resource-isolation.md b/versioned_docs/version-v2.6.0/key-features/device-resource-isolation.md index 1b84235..39f323a 100644 --- a/versioned_docs/version-v2.6.0/key-features/device-resource-isolation.md +++ b/versioned_docs/version-v2.6.0/key-features/device-resource-isolation.md @@ -2,7 +2,7 @@ title: Device resource isolation --- -A simple demostration for device isolation: +A simple demonstration for device isolation: A task with the following resources. ``` diff --git a/versioned_docs/version-v2.6.0/releases.md b/versioned_docs/version-v2.6.0/releases.md index 150ef87..43b122a 100644 --- a/versioned_docs/version-v2.6.0/releases.md +++ b/versioned_docs/version-v2.6.0/releases.md @@ -79,7 +79,7 @@ Hence, if an issue is important it is important to advocate its priority early i \ No newline at end of file +``` --> diff --git a/versioned_docs/version-v2.7.0/resources/installation/install-binary/generate_cert/generate_ca.sh b/versioned_docs/version-v2.7.0/resources/installation/install-binary/generate_cert/generate_ca.sh index 519b735..7b3cf6d 100755 --- a/versioned_docs/version-v2.7.0/resources/installation/install-binary/generate_cert/generate_ca.sh +++ b/versioned_docs/version-v2.7.0/resources/installation/install-binary/generate_cert/generate_ca.sh @@ -1,6 +1,6 @@ #!/bin/bash -# genererate front-proxy-ca, server-ca +# generate front-proxy-ca, server-ca set -e set -o pipefail @@ -23,4 +23,4 @@ function main() { gen_front_proxy_ca } -main "$@" \ No newline at end of file +main "$@" diff --git a/versioned_docs/version-v2.7.0/resources/installation/install-binary/generate_cert/generate_etcd.sh b/versioned_docs/version-v2.7.0/resources/installation/install-binary/generate_cert/generate_etcd.sh index 7c82b46..7fcce7f 100755 --- a/versioned_docs/version-v2.7.0/resources/installation/install-binary/generate_cert/generate_etcd.sh +++ b/versioned_docs/version-v2.7.0/resources/installation/install-binary/generate_cert/generate_etcd.sh @@ -1,6 +1,6 @@ #!/bin/bash -# genererate CA & leaf certificates of etcd. +# generate CA & leaf certificates of etcd. set -e set -o pipefail @@ -45,4 +45,4 @@ function main() { generate_leaf_certs } -main "$@" \ No newline at end of file +main "$@" diff --git a/versioned_docs/version-v2.7.0/resources/node-shceduler-policy-demo.png b/versioned_docs/version-v2.7.0/resources/node-scheduler-policy-demo.png similarity index 100% rename from versioned_docs/version-v2.7.0/resources/node-shceduler-policy-demo.png rename to versioned_docs/version-v2.7.0/resources/node-scheduler-policy-demo.png diff --git a/versioned_docs/version-v2.7.0/userguide/Enflame-device/enable-enflame-gcu-sharing.md b/versioned_docs/version-v2.7.0/userguide/Enflame-device/enable-enflame-gcu-sharing.md index f6b680f..a8a3026 100644 --- a/versioned_docs/version-v2.7.0/userguide/Enflame-device/enable-enflame-gcu-sharing.md +++ b/versioned_docs/version-v2.7.0/userguide/Enflame-device/enable-enflame-gcu-sharing.md @@ -24,7 +24,7 @@ title: Enable Enflame GPU Sharing ## Enabling GCU-sharing Support -* Deploy gcushare-device-plugin on enflame nodes (Please consult your device provider to aquire its package and document) +* Deploy gcushare-device-plugin on enflame nodes (Please consult your device provider to acquire its package and document) > **NOTICE:** *Install only gpushare-device-plugin, don't install gpu-scheduler-plugin package.* @@ -122,4 +122,4 @@ Look for annotations containing device information in the node status. 2. Multiple GCU allocation in one container is not supported yet -3. `efsmi` inside container shows the total device memory, which is NOT a bug, device memory will be properly limited when running tasks. \ No newline at end of file +3. `efsmi` inside container shows the total device memory, which is NOT a bug, device memory will be properly limited when running tasks. diff --git a/versioned_docs/version-v2.7.0/userguide/Hygon-device/enable-hygon-dcu-sharing.md b/versioned_docs/version-v2.7.0/userguide/Hygon-device/enable-hygon-dcu-sharing.md index d2fc205..c0a950f 100644 --- a/versioned_docs/version-v2.7.0/userguide/Hygon-device/enable-hygon-dcu-sharing.md +++ b/versioned_docs/version-v2.7.0/userguide/Hygon-device/enable-hygon-dcu-sharing.md @@ -78,4 +78,4 @@ Launch your DCU tasks like you usually do 1. DCU-sharing in init container is not supported, pods with "hygon.com/dcumem" in init container will never be scheduled. -2. Only one vdcu can be aquired per container. If you want to mount multiple dcu devices, then you shouldn't set `hygon.com/dcumem` or `hygon.com/dcucores` \ No newline at end of file +2. Only one vdcu can be acquired per container. If you want to mount multiple dcu devices, then you shouldn't set `hygon.com/dcumem` or `hygon.com/dcucores` diff --git a/versioned_docs/version-v2.7.0/userguide/Iluvatar-device/enable-illuvatar-gpu-sharing.md b/versioned_docs/version-v2.7.0/userguide/Iluvatar-device/enable-illuvatar-gpu-sharing.md index 11621e7..9771b6b 100644 --- a/versioned_docs/version-v2.7.0/userguide/Iluvatar-device/enable-illuvatar-gpu-sharing.md +++ b/versioned_docs/version-v2.7.0/userguide/Iluvatar-device/enable-illuvatar-gpu-sharing.md @@ -24,7 +24,7 @@ title: Enable Illuvatar GPU Sharing ## Enabling GPU-sharing Support -* Deploy gpu-manager on iluvatar nodes (Please consult your device provider to aquire its package and document) +* Deploy gpu-manager on iluvatar nodes (Please consult your device provider to acquire its package and document) > **NOTICE:** *Install only gpu-manager, don't install gpu-admission package.* @@ -151,4 +151,4 @@ Look for annotations containing device information in the node status. 3. The `iluvatar.ai/vcuda-memory` resource is only effective when `iluvatar.ai/vgpu=1`. -4. Multi-device requests (`iluvatar.ai/vgpu > 1`) do not support vGPU mode. \ No newline at end of file +4. Multi-device requests (`iluvatar.ai/vgpu > 1`) do not support vGPU mode. diff --git a/versioned_docs/version-v2.7.0/userguide/Metax-device/Metax-GPU/enable-metax-gpu-schedule.md b/versioned_docs/version-v2.7.0/userguide/Metax-device/Metax-GPU/enable-metax-gpu-schedule.md index 2f4c29e..106097c 100644 --- a/versioned_docs/version-v2.7.0/userguide/Metax-device/Metax-GPU/enable-metax-gpu-schedule.md +++ b/versioned_docs/version-v2.7.0/userguide/Metax-device/Metax-GPU/enable-metax-gpu-schedule.md @@ -40,7 +40,7 @@ the GPU device plugin (gpu-device) handles fine-grained allocation based on the ## Enabling topo-awareness scheduling -* Deploy Metax GPU Extensions on metax nodes (Please consult your device provider to aquire its package and document) +* Deploy Metax GPU Extensions on metax nodes (Please consult your device provider to acquire its package and document) * Deploy HAMi according to README.md diff --git a/versioned_docs/version-v2.7.0/userguide/Metax-device/Metax-GPU/examples/allocate-binpack.md b/versioned_docs/version-v2.7.0/userguide/Metax-device/Metax-GPU/examples/allocate-binpack.md index 14b6690..24b4964 100644 --- a/versioned_docs/version-v2.7.0/userguide/Metax-device/Metax-GPU/examples/allocate-binpack.md +++ b/versioned_docs/version-v2.7.0/userguide/Metax-device/Metax-GPU/examples/allocate-binpack.md @@ -2,7 +2,7 @@ title: Binpack schedule policy --- -To allocate metax device with mininum damage to topology, you need to only assign `metax-tech.com/gpu` with annotations `hami.io/node-scheduler-policy: "binpack"`. +To allocate metax device with minimum damage to topology, you need to only assign `metax-tech.com/gpu` with annotations `hami.io/node-scheduler-policy: "binpack"`. ```yaml apiVersion: v1 diff --git a/versioned_docs/version-v2.7.0/userguide/Metax-device/Metax-GPU/specify-binpack-task.md b/versioned_docs/version-v2.7.0/userguide/Metax-device/Metax-GPU/specify-binpack-task.md index 3b18514..a5baa16 100644 --- a/versioned_docs/version-v2.7.0/userguide/Metax-device/Metax-GPU/specify-binpack-task.md +++ b/versioned_docs/version-v2.7.0/userguide/Metax-device/Metax-GPU/specify-binpack-task.md @@ -2,7 +2,7 @@ title: Binpack schedule policy --- -To allocate metax device with mininum damage to topology, you need to only assign `metax-tech.com/gpu` with annotations `hami.io/node-scheduler-policy: "binpack"`. +To allocate metax device with minimum damage to topology, you need to only assign `metax-tech.com/gpu` with annotations `hami.io/node-scheduler-policy: "binpack"`. ```yaml metadata: diff --git a/versioned_docs/version-v2.7.0/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md b/versioned_docs/version-v2.7.0/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md index 162c7ea..abbe2ca 100644 --- a/versioned_docs/version-v2.7.0/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md +++ b/versioned_docs/version-v2.7.0/userguide/Mthreads-device/enable-mthreads-gpu-sharing.md @@ -29,7 +29,7 @@ title: Enable Mthreads GPU sharing ## Enabling GPU-sharing Support -* Deploy MT-CloudNative Toolkit on mthreads nodes (Please consult your device provider to aquire its package and document) +* Deploy MT-CloudNative Toolkit on mthreads nodes (Please consult your device provider to acquire its package and document) > **NOTICE:** *You can remove mt-mutating-webhook and mt-gpu-scheduler after installation(optional).* @@ -66,4 +66,4 @@ spec: > **NOTICE1:** *Each unit of sgpu-memory indicates 512M device memory* -> **NOTICE2:** *You can find more examples in [examples/mthreads folder](https://github.com/Project-HAMi/HAMi/tree/release-v2.6/examples/mthreads/)* \ No newline at end of file +> **NOTICE2:** *You can find more examples in [examples/mthreads folder](https://github.com/Project-HAMi/HAMi/tree/release-v2.6/examples/mthreads/)* diff --git a/versioned_docs/version-v2.7.0/userguide/NVIDIA-device/examples/specify-card-type-to-use.md b/versioned_docs/version-v2.7.0/userguide/NVIDIA-device/examples/specify-card-type-to-use.md index ce0bef0..397e984 100644 --- a/versioned_docs/version-v2.7.0/userguide/NVIDIA-device/examples/specify-card-type-to-use.md +++ b/versioned_docs/version-v2.7.0/userguide/NVIDIA-device/examples/specify-card-type-to-use.md @@ -24,4 +24,4 @@ spec: nvidia.com/gpu: 2 # requesting 2 vGPUs ``` -> **NOTICE:** * You can assign this task to multiple GPU types, use comma to seperate,In this example, we want to run this job on A100 or V100* \ No newline at end of file +> **NOTICE:** * You can assign this task to multiple GPU types, use comma to separate,In this example, we want to run this job on A100 or V100* diff --git a/versioned_docs/version-v2.7.0/userguide/NVIDIA-device/specify-device-type-to-use.md b/versioned_docs/version-v2.7.0/userguide/NVIDIA-device/specify-device-type-to-use.md index 0ec756e..3d3eef6 100644 --- a/versioned_docs/version-v2.7.0/userguide/NVIDIA-device/specify-device-type-to-use.md +++ b/versioned_docs/version-v2.7.0/userguide/NVIDIA-device/specify-device-type-to-use.md @@ -11,7 +11,7 @@ For example, a task with the following annotation will be assigned to A100 or V1 ```yaml metadata: annotations: - nvidia.com/use-gputype: "A100,V100" # Specify the card type for this job, use comma to seperate, will not launch job on non-specified card + nvidia.com/use-gputype: "A100,V100" # Specify the card type for this job, use comma to separate, will not launch job on non-specified card ``` A task may use `nvidia.com/nouse-gputype` to evade certain type of GPU. In this following example, that job won't be assigned to 1080(include 1080Ti) or 2080(include 2080Ti) type of card. @@ -19,5 +19,5 @@ A task may use `nvidia.com/nouse-gputype` to evade certain type of GPU. In this ```yaml metadata: annotations: - nvidia.com/nouse-gputype: "1080,2080" # Specify the blacklist card type for this job, use comma to seperate, will not launch job on specified card + nvidia.com/nouse-gputype: "1080,2080" # Specify the blacklist card type for this job, use comma to separate, will not launch job on specified card ``` diff --git a/versioned_docs/version-v2.7.0/userguide/monitoring/real-time-device-usage.md b/versioned_docs/version-v2.7.0/userguide/monitoring/real-time-device-usage.md index e0ee6a4..78e44d9 100644 --- a/versioned_docs/version-v2.7.0/userguide/monitoring/real-time-device-usage.md +++ b/versioned_docs/version-v2.7.0/userguide/monitoring/real-time-device-usage.md @@ -14,9 +14,9 @@ It contains the following metrics: | Metrics | Description | Example | |----------|-------------|---------| -| Device_memory_desc_of_container | Container device meory real-time usage | `{context="0",ctrname="2-1-3-pod-1",data="0",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",module="0",offset="0",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 0 | +| Device_memory_desc_of_container | Container device memory real-time usage | `{context="0",ctrname="2-1-3-pod-1",data="0",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",module="0",offset="0",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 0 | | Device_utilization_desc_of_container | Container device real-time utilization | `{ctrname="2-1-3-pod-1",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 0 | | HostCoreUtilization | GPU real-time utilization on host | `{deviceidx="0",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",zone="vGPU"}` 0 | | HostGPUMemoryUsage | GPU real-time device memory usage on host | `{deviceidx="0",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",zone="vGPU"}` 2.87244288e+08 | | vGPU_device_memory_limit_in_bytes | device limit for a certain container | `{ctrname="2-1-3-pod-1",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 2.62144e+09 | -| vGPU_device_memory_usage_in_bytes | device usage for a certain container | `{ctrname="2-1-3-pod-1",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 0 | \ No newline at end of file +| vGPU_device_memory_usage_in_bytes | device usage for a certain container | `{ctrname="2-1-3-pod-1",deviceuuid="GPU-00552014-5c87-89ac-b1a6-7b53aa24b0ec",podname="2-1-3-pod-1",podnamespace="default",vdeviceid="0",zone="vGPU"}` 0 | diff --git a/versioned_docs/version-v2.7.0/userguide/volcano-vgpu/NVIDIA-GPU/examples/use_exclusive_gpu.md b/versioned_docs/version-v2.7.0/userguide/volcano-vgpu/NVIDIA-GPU/examples/use_exclusive_gpu.md index 700597e..21feb72 100644 --- a/versioned_docs/version-v2.7.0/userguide/volcano-vgpu/NVIDIA-GPU/examples/use_exclusive_gpu.md +++ b/versioned_docs/version-v2.7.0/userguide/volcano-vgpu/NVIDIA-GPU/examples/use_exclusive_gpu.md @@ -4,7 +4,7 @@ title: Exclusive gpu usage ## Job description -To allocate an exlusive GPU, you need only assign `volcano.sh/vgpu-number` without any other `volcano.sh/xxx` fields, as the example below: +To allocate an exclusive GPU, you need only assign `volcano.sh/vgpu-number` without any other `volcano.sh/xxx` fields, as the example below: ```yaml apiVersion: v1 diff --git a/versioned_docs/version-v2.7.0/userguide/volcano-vgpu/NVIDIA-GPU/how-to-use-volcano-vgpu.md b/versioned_docs/version-v2.7.0/userguide/volcano-vgpu/NVIDIA-GPU/how-to-use-volcano-vgpu.md index 036a9d6..1658ca2 100644 --- a/versioned_docs/version-v2.7.0/userguide/volcano-vgpu/NVIDIA-GPU/how-to-use-volcano-vgpu.md +++ b/versioned_docs/version-v2.7.0/userguide/volcano-vgpu/NVIDIA-GPU/how-to-use-volcano-vgpu.md @@ -113,7 +113,7 @@ spec: resources: limits: volcano.sh/vgpu-number: 2 # requesting 2 gpu cards - volcano.sh/vgpu-memory: 3000 # (optinal)each vGPU uses 3G device memory + volcano.sh/vgpu-memory: 3000 # (optional)each vGPU uses 3G device memory volcano.sh/vgpu-cores: 50 # (optional)each vGPU uses 50% core EOF ``` diff --git a/versioned_sidebars/version-v1.3.0-sidebars.json b/versioned_sidebars/version-v1.3.0-sidebars.json index 68e6aed..faf5728 100644 --- a/versioned_sidebars/version-v1.3.0-sidebars.json +++ b/versioned_sidebars/version-v1.3.0-sidebars.json @@ -203,7 +203,7 @@ "label": "Contributor Guide", "items": [ "contributor/contributing", - "contributor/goverance", + "contributor/governance", "contributor/ladder" ] }, diff --git a/versioned_sidebars/version-v2.4.1-sidebars.json b/versioned_sidebars/version-v2.4.1-sidebars.json index 68e6aed..faf5728 100644 --- a/versioned_sidebars/version-v2.4.1-sidebars.json +++ b/versioned_sidebars/version-v2.4.1-sidebars.json @@ -203,7 +203,7 @@ "label": "Contributor Guide", "items": [ "contributor/contributing", - "contributor/goverance", + "contributor/governance", "contributor/ladder" ] }, diff --git a/versioned_sidebars/version-v2.5.0-sidebars.json b/versioned_sidebars/version-v2.5.0-sidebars.json index e1db452..aefdae3 100644 --- a/versioned_sidebars/version-v2.5.0-sidebars.json +++ b/versioned_sidebars/version-v2.5.0-sidebars.json @@ -252,7 +252,7 @@ "label": "Contributor Guide", "items": [ "contributor/contributing", - "contributor/goverance", + "contributor/governance", "contributor/ladder" ] }, diff --git a/versioned_sidebars/version-v2.5.1-sidebars.json b/versioned_sidebars/version-v2.5.1-sidebars.json index 479d608..ccdd7a7 100644 --- a/versioned_sidebars/version-v2.5.1-sidebars.json +++ b/versioned_sidebars/version-v2.5.1-sidebars.json @@ -204,7 +204,7 @@ "label": "Contributor Guide", "items": [ "contributor/contributing", - "contributor/goverance", + "contributor/governance", "contributor/ladder" ] }, diff --git a/versioned_sidebars/version-v2.6.0-sidebars.json b/versioned_sidebars/version-v2.6.0-sidebars.json index dd20881..e8ef101 100644 --- a/versioned_sidebars/version-v2.6.0-sidebars.json +++ b/versioned_sidebars/version-v2.6.0-sidebars.json @@ -246,7 +246,7 @@ "label": "Contributor Guide", "items": [ "contributor/contributing", - "contributor/goverance", + "contributor/governance", "contributor/ladder" ] }, diff --git a/versioned_sidebars/version-v2.7.0-sidebars.json b/versioned_sidebars/version-v2.7.0-sidebars.json index 388dfc0..fafc527 100644 --- a/versioned_sidebars/version-v2.7.0-sidebars.json +++ b/versioned_sidebars/version-v2.7.0-sidebars.json @@ -283,7 +283,7 @@ "label": "Contributor Guide", "items": [ "contributor/contributing", - "contributor/goverance", + "contributor/governance", "contributor/ladder" ] },