diff --git a/buildkit/build_llb/build_graph.go b/buildkit/build_llb/build_graph.go index 45f5ea3b..7993be82 100644 --- a/buildkit/build_llb/build_graph.go +++ b/buildkit/build_llb/build_graph.go @@ -1,3 +1,5 @@ +// Converts the internal build plan graph to a BuildKit LLB + package build_llb import ( @@ -96,7 +98,7 @@ func NewBuildGraph(plan *plan.BuildPlan, localState *llb.State, cacheStore *Buil return g, nil } -// GenerateLLB generates the LLB state for the build graph +// generate the LLB state for the build graph func (g *BuildGraph) GenerateLLB() (*BuildGraphOutput, error) { // Get processing order using topological sort order, err := g.graph.ComputeProcessingOrder() @@ -180,7 +182,7 @@ func (g *BuildGraph) processNode(node *StepNode) error { return nil } -// convertNodeToLLB converts a step node to an LLB state +// converts a step node to an LLB state func (g *BuildGraph) convertNodeToLLB(node *StepNode) (*llb.State, error) { state, err := g.getNodeStartingState(node) if err != nil { @@ -391,7 +393,7 @@ func (g *BuildGraph) getSecretInvalidationMountOptions(node *StepNode, secretOpt return opts } -// getCacheMountOptions returns the llb.RunOption slice for the given cache keys +// returns the llb.RunOption slice for the given cache keys func (g *BuildGraph) getCacheMountOptions(cacheKeys []string) ([]llb.RunOption, error) { var opts []llb.RunOption diff --git a/cli/common.go b/cli/common.go index b31abf1b..bc5a8d84 100644 --- a/cli/common.go +++ b/cli/common.go @@ -88,6 +88,7 @@ func GenerateBuildResultForCommand(cmd *cli.Command) (*core.BuildResult, *a.App, return buildResult, app, env, nil } +// add $schema link to resulting map JSON for improved IDE experience when manually editing func addSchemaToPlanMap(p *plan.BuildPlan) (map[string]any, error) { if p == nil { return map[string]any{"$schema": config.SchemaUrl}, nil diff --git a/core/__snapshots__/TestGenerateBuildPlanForExamples_node-npm-install-in-build_1.snap.json b/core/__snapshots__/TestGenerateBuildPlanForExamples_node-npm-install-in-build_1.snap.json new file mode 100755 index 00000000..7434f129 --- /dev/null +++ b/core/__snapshots__/TestGenerateBuildPlanForExamples_node-npm-install-in-build_1.snap.json @@ -0,0 +1,155 @@ +{ + "caches": { + "node-modules": { + "directory": "/app/node_modules/.cache", + "type": "shared" + }, + "npm-install": { + "directory": "/root/.npm", + "type": "shared" + } + }, + "deploy": { + "base": { + "image": "ghcr.io/railwayapp/railpack-runtime:latest" + }, + "inputs": [ + { + "include": [ + "/mise/shims", + "/mise/installs", + "/usr/local/bin/mise", + "/etc/mise/config.toml", + "/root/.local/state/mise" + ], + "step": "packages:mise" + }, + { + "include": [ + "/app/node_modules" + ], + "step": "build" + }, + { + "exclude": [ + "node_modules", + ".yarn" + ], + "include": [ + "/root/.cache", + "." + ], + "step": "build" + }, + { + "include": [ + "." + ], + "step": "build" + } + ], + "startCommand": "npm run start", + "variables": { + "CI": "true", + "NODE_ENV": "production", + "NPM_CONFIG_FUND": "false", + "NPM_CONFIG_PRODUCTION": "false", + "NPM_CONFIG_UPDATE_NOTIFIER": "false" + } + }, + "steps": [ + { + "assets": { + "mise.toml": "[mise.toml]" + }, + "commands": [ + { + "path": "/mise/shims" + }, + { + "customName": "create mise config", + "name": "mise.toml", + "path": "/etc/mise/config.toml" + }, + { + "cmd": "sh -c 'mise trust -a \u0026\u0026 mise install'", + "customName": "install mise packages: node" + } + ], + "inputs": [ + { + "image": "ghcr.io/railwayapp/railpack-builder:latest" + } + ], + "name": "packages:mise", + "variables": { + "MISE_CACHE_DIR": "/mise/cache", + "MISE_CONFIG_DIR": "/mise", + "MISE_DATA_DIR": "/mise", + "MISE_INSTALLS_DIR": "/mise/installs", + "MISE_NODE_VERIFY": "false", + "MISE_SHIMS_DIR": "/mise/shims" + } + }, + { + "caches": [ + "npm-install" + ], + "commands": [ + { + "path": "/app/node_modules/.bin" + }, + { + "cmd": "mkdir -p /app/node_modules/.cache" + }, + { + "dest": "package.json", + "src": "package.json" + }, + { + "dest": "package-lock.json", + "src": "package-lock.json" + }, + { + "cmd": "npm ci" + } + ], + "inputs": [ + { + "step": "packages:mise" + } + ], + "name": "install", + "variables": { + "CI": "true", + "NODE_ENV": "production", + "NPM_CONFIG_FUND": "false", + "NPM_CONFIG_PRODUCTION": "false", + "NPM_CONFIG_UPDATE_NOTIFIER": "false" + } + }, + { + "commands": [ + { + "cmd": "sh -c 'npm ci'", + "customName": "npm ci" + } + ], + "inputs": [ + { + "step": "install" + }, + { + "include": [ + "." + ], + "local": true + } + ], + "name": "build", + "secrets": [ + "*" + ] + } + ] +} \ No newline at end of file diff --git a/core/cleanse.go b/core/cleanse.go new file mode 100644 index 00000000..1bc75044 --- /dev/null +++ b/core/cleanse.go @@ -0,0 +1,77 @@ +package core + +import ( + "regexp" + + "github.com/railwayapp/railpack/core/logger" + "github.com/railwayapp/railpack/core/plan" + "github.com/railwayapp/railpack/core/providers/node" +) + +// Regexes for matching commands that intentionally remove node_modules or perform +// clean installs (which implicitly delete the directory) so we can avoid mounting +// the node_modules cache in those steps. +var ( + // Matches "npm ci" with flexible whitespace, using word boundaries + npmCiCommandRegex = regexp.MustCompile(`(?i)\bnpm\s+ci\b`) + + // Matches common delete commands targeting node_modules + removeNodeModulesRegex = regexp.MustCompile(`(?i)\b(?:rm\s+-r[f]?|rmdir|rimraf)\s+(?:\S*\/)?node_modules\b`) +) + +// willRemoveNodeModules determines if any command in the provided slice removes +// the node_modules directory either directly (rm/rimraf) or indirectly (npm ci). +// this is brittle & imperfect: https://github.com/railwayapp/railpack/pull/259 +func willRemoveNodeModules(commands []plan.Command) bool { + for _, cmd := range commands { + if execCmd, ok := cmd.(plan.ExecCommand); ok { + if npmCiCommandRegex.MatchString(execCmd.Cmd) || removeNodeModulesRegex.MatchString(execCmd.Cmd) { + return true + } + } + } + return false +} + +// cleansePlanStructure applies mutations to the build plan structure after it +// is generated but before validation / serialization. Today this focuses on +// detaching the node_modules cache from steps that explicitly remove +// node_modules so the global cache isn't invalidated unintentionally. +func cleansePlanStructure(buildPlan *plan.BuildPlan, logger *logger.Logger) { + // let's get the cache key name that has a Directory of NODE_MODULES_CACHE + var nodeModulesCacheKey string + for cacheName, cacheDef := range buildPlan.Caches { + if cacheDef.Directory == node.NODE_MODULES_CACHE { + nodeModulesCacheKey = cacheName + break + } + } + + if nodeModulesCacheKey == "" { + // no node_modules cache defined, nothing to do + return + } + + // Only detach the node modules cache from steps that remove node_modules themselves. + // Keep the global cache definition so earlier steps (like install) can still mount it. + for i, step := range buildPlan.Steps { + if step.Name == "install" || !willRemoveNodeModules(step.Commands) { + continue + } + + before := len(step.Caches) + if before == 0 { + continue + } + + // It's important that we do not result in an array with a zeroed string, which is why we are using this ugly loop + var newCaches []string + for _, name := range step.Caches { + if name != "" && name != nodeModulesCacheKey { + newCaches = append(newCaches, name) + } + } + + buildPlan.Steps[i].Caches = newCaches + } +} diff --git a/core/cleanse_test.go b/core/cleanse_test.go new file mode 100644 index 00000000..2076501c --- /dev/null +++ b/core/cleanse_test.go @@ -0,0 +1,61 @@ +package core + +import ( + "reflect" + "testing" + + "github.com/railwayapp/railpack/core/logger" + "github.com/railwayapp/railpack/core/plan" + "github.com/railwayapp/railpack/core/providers/node" +) + +func newTestLogger() *logger.Logger { return logger.NewLogger() } + +// helper to create a basic build plan with a node_modules cache (when withCache true) +func buildPlan(withCache bool) *plan.BuildPlan { + p := plan.NewBuildPlan() + if withCache { + p.Caches["node_modules"] = &plan.Cache{Directory: node.NODE_MODULES_CACHE, Type: plan.CacheTypeShared} + } + return p +} + +func TestCleanse_CachePresent_StepDoesNotRemoveNodeModules(t *testing.T) { + p := buildPlan(true) + step := plan.Step{Name: "build", Caches: []string{"node_modules"}} + step.Commands = []plan.Command{plan.NewExecShellCommand("echo 'nothing to see'")} + p.Steps = append(p.Steps, step) + + cleansePlanStructure(p, newTestLogger()) + + // should remain mounted + if !reflect.DeepEqual(p.Steps[0].Caches, []string{"node_modules"}) { + t.Fatalf("expected cache to remain since step doesn't remove node_modules, got %#v", p.Steps[0].Caches) + } +} + +func TestCleanse_CachePresent_StepRemovesNodeModules(t *testing.T) { + p := buildPlan(true) + step := plan.Step{Name: "build", Caches: []string{"node_modules"}} + step.Commands = []plan.Command{plan.NewExecShellCommand("rm -rf node_modules && echo done")} + p.Steps = append(p.Steps, step) + + cleansePlanStructure(p, newTestLogger()) + + if len(p.Steps[0].Caches) != 0 { // should be removed (allow nil or empty) + t.Fatalf("expected cache to be removed (nil or empty), got %#v", p.Steps[0].Caches) + } +} + +func TestCleanse_InstallStepAlwaysKeepsCache(t *testing.T) { + p := buildPlan(true) + install := plan.Step{Name: "install", Caches: []string{"node_modules"}} + install.Commands = []plan.Command{plan.NewExecShellCommand("npm ci")} + p.Steps = append(p.Steps, install) + + cleansePlanStructure(p, newTestLogger()) + + if !reflect.DeepEqual(p.Steps[0].Caches, []string{"node_modules"}) { // should remain even though npm ci matches removal heuristic + t.Fatalf("expected install step cache to remain, got %#v", p.Steps[0].Caches) + } +} diff --git a/core/core.go b/core/core.go index 966f6775..470ac2f6 100644 --- a/core/core.go +++ b/core/core.go @@ -109,12 +109,16 @@ func GenerateBuildPlan(app *app.App, env *app.Environment, options *GenerateBuil return &BuildResult{Success: false, Logs: logger.Logs} } + // before `Generate()` any commands provided by railpack.json are *not* merged into the provider-generated + // buildPlan. This means providers can't view any of the custom structure provided by the user via a railpack.json buildPlan, resolvedPackages, err := ctx.Generate() if err != nil { logger.LogError("%s", err.Error()) return &BuildResult{Success: false, Logs: logger.Logs} } + cleansePlanStructure(buildPlan, logger) + if !ValidatePlan(buildPlan, app, logger, &ValidatePlanOptions{ ErrorMissingStartCommand: options.ErrorMissingStartCommand, ProviderToUse: providerToUse, @@ -135,6 +139,8 @@ func GenerateBuildPlan(app *app.App, env *app.Environment, options *GenerateBuil return buildResult } +// cleansing logic moved to cleanse.go + // GetConfig merges the options, environment, and file config into a single config func GetConfig(app *app.App, env *app.Environment, options *GenerateBuildPlanOptions, logger *logger.Logger) (*c.Config, error) { optionsConfig := GenerateConfigFromOptions(options) @@ -151,7 +157,6 @@ func GetConfig(app *app.App, env *app.Environment, options *GenerateBuildPlanOpt return mergedConfig, nil } -// GenerateConfigFromFile generates a config from the config file func GenerateConfigFromFile(app *app.App, env *app.Environment, options *GenerateBuildPlanOptions, logger *logger.Logger) (*c.Config, error) { config := c.EmptyConfig() @@ -233,7 +238,7 @@ func GenerateConfigFromEnvironment(env *app.Environment) *c.Config { return config } -// GenerateConfigFromOptions generates a config from the CLI options +// generates a config from the CLI options func GenerateConfigFromOptions(options *GenerateBuildPlanOptions) *c.Config { config := c.EmptyConfig() diff --git a/core/plan/cache.go b/core/plan/cache.go index d8f0bf8a..96e852c6 100644 --- a/core/plan/cache.go +++ b/core/plan/cache.go @@ -6,11 +6,8 @@ const ( ) type Cache struct { - // The directory to cache Directory string `json:"directory,omitempty" jsonschema:"description=The directory to cache"` - - // The type of cache (either "shared" or "locked") - Type string `json:"type,omitempty" jsonschema:"enum=shared,enum=locked,default=shared,description=The type of cache (either 'shared' or 'locked')"` + Type string `json:"type,omitempty" jsonschema:"enum=shared,enum=locked,default=shared,description=The type of cache (either 'shared' or 'locked')"` } func NewCache(directory string) *Cache { diff --git a/core/providers/node/node.go b/core/providers/node/node.go index 1de3b6d5..fdc1b003 100644 --- a/core/providers/node/node.go +++ b/core/providers/node/node.go @@ -18,7 +18,9 @@ const ( DEFAULT_NODE_VERSION = "22" DEFAULT_BUN_VERSION = "latest" - COREPACK_HOME = "/opt/corepack" + COREPACK_HOME = "/opt/corepack" + + // not used by npm, but many other tools: next, jest, webpack, etc NODE_MODULES_CACHE = "/app/node_modules/.cache" ) @@ -129,6 +131,7 @@ func (p *NodeProvider) Plan(ctx *generate.GenerateContext) error { buildLayer := plan.NewStepLayer(build.Name(), plan.Filter{ Include: buildIncludeDirs, + // TODO we should just have a default dockerignore/exclusion list instead of hardcoding here Exclude: []string{"node_modules", ".yarn"}, }) @@ -184,7 +187,7 @@ func (p *NodeProvider) Build(ctx *generate.GenerateContext, build *generate.Comm } } - p.addCaches(ctx, build) + p.addCachesToBuildStep(ctx, build) } func (p *NodeProvider) addFrameworkCaches(ctx *generate.GenerateContext, build *generate.CommandStepBuilder, frameworkName string, frameworkCheck func(*WorkspacePackage, *generate.GenerateContext) bool, cacheSubPath string) { @@ -202,8 +205,10 @@ func (p *NodeProvider) addFrameworkCaches(ctx *generate.GenerateContext, build * } } -func (p *NodeProvider) addCaches(ctx *generate.GenerateContext, build *generate.CommandStepBuilder) { - build.AddCache(ctx.Caches.AddCache("node-modules", "/app/node_modules/.cache")) +// cache directories to add to the build step: if lock files are unchanged, these are pulled from cache, but cannot +// be removed in future steps. +func (p *NodeProvider) addCachesToBuildStep(ctx *generate.GenerateContext, build *generate.CommandStepBuilder) { + build.AddCache(ctx.Caches.AddCache("node-modules", NODE_MODULES_CACHE)) p.addFrameworkCaches(ctx, build, "next", func(pkg *WorkspacePackage, ctx *generate.GenerateContext) bool { if pkg.PackageJson.HasScript("build") { @@ -246,6 +251,7 @@ func (p *NodeProvider) InstallNodeDeps(ctx *generate.GenerateContext, install *g install.UseSecretsWithPrefixes([]string{"NODE", "NPM", "BUN", "PNPM", "YARN", "CI"}) install.AddPaths([]string{"/app/node_modules/.bin"}) + // TODO once dockerignore is in place, we should remove this if ctx.App.HasMatch("node_modules") { ctx.Logger.LogWarn("node_modules directory found in project root, this is likely a mistake") ctx.Logger.LogWarn("It is recommended to add node_modules to the .gitignore file") diff --git a/core/providers/procfile/procfile.go b/core/providers/procfile/procfile.go index c359ac0d..4e019010 100644 --- a/core/providers/procfile/procfile.go +++ b/core/providers/procfile/procfile.go @@ -1,3 +1,4 @@ +// this provider is unique: it used solely to extract a start command package procfile import "github.com/railwayapp/railpack/core/generate" diff --git a/docs/src/content/docs/config/file.mdx b/docs/src/content/docs/config/file.mdx index 12d1bf43..86cc661b 100644 --- a/docs/src/content/docs/config/file.mdx +++ b/docs/src/content/docs/config/file.mdx @@ -173,7 +173,8 @@ For example: Caches are used to speed up builds by storing and reusing files between builds. Each cache has a type and a directory. Caches **are not persisted** in the final -image. +image. Cache folders cannot be removed by build scripts once defined (you'll receive +a `EBUSY: resource busy or locked` error if you try). The cache name is referenced in the `caches` field of a step. A cache has the following properties: diff --git a/docs/src/content/docs/guides/developing-locally.md b/docs/src/content/docs/guides/developing-locally.md index 5814c7c2..d6c9b892 100644 --- a/docs/src/content/docs/guides/developing-locally.md +++ b/docs/src/content/docs/guides/developing-locally.md @@ -120,6 +120,22 @@ mise tool poetry Here's some helpful debugging tricks: * `URFAVE_CLI_TRACING=on` for debugging CLI argument parsing -* `mise run cli --verbose build --show-plan --progress plain examples/node-bun` +* `mise run cli -- --verbose build --show-plan --progress plain examples/node-bun` * `mise run build`, add `./bin/` to your `$PATH`, and then run `railpack` in a separate local directory -* `NO_COLOR=1` \ No newline at end of file +* `NO_COLOR=1` + +### Interactive Debugging with Delve + +```sh +mise run debug-cli build $(pwd) +``` + +Then, set some breakpoints: + +``` +break core/providers/node/node.go:177 +continue +``` + +The commands you probably want: `ls`, `print build.Commands`, `continue`, `next`, `locals`, + diff --git a/examples/node-npm-install-in-build/index.js b/examples/node-npm-install-in-build/index.js new file mode 100644 index 00000000..47b16db1 --- /dev/null +++ b/examples/node-npm-install-in-build/index.js @@ -0,0 +1 @@ +console.log("hello from Node " + process.version); diff --git a/examples/node-npm-install-in-build/package-lock.json b/examples/node-npm-install-in-build/package-lock.json new file mode 100644 index 00000000..1bf0ce8c --- /dev/null +++ b/examples/node-npm-install-in-build/package-lock.json @@ -0,0 +1,15 @@ +{ + "name": "node-npm", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "node-npm", + "version": "1.0.0", + "engines": { + "node": "23.5.0" + } + } + } +} diff --git a/examples/node-npm-install-in-build/package.json b/examples/node-npm-install-in-build/package.json new file mode 100644 index 00000000..61505021 --- /dev/null +++ b/examples/node-npm-install-in-build/package.json @@ -0,0 +1,12 @@ +{ + "name": "node-npm", + "version": "1.0.0", + "main": "index.js", + "scripts": { + "start": "node index.js", + "test": "echo \"Error: no test specified\" && exit 1" + }, + "engines": { + "node": "23.5.0" + } +} diff --git a/examples/node-npm-install-in-build/railpack.json b/examples/node-npm-install-in-build/railpack.json new file mode 100644 index 00000000..164243b4 --- /dev/null +++ b/examples/node-npm-install-in-build/railpack.json @@ -0,0 +1,11 @@ +{ + "$schema": "https://schema.railpack.com", + "steps": { + "build": { + // npm ci is generally run in the install phase, running it during build causes + // node_modules to be removed, which causes a buildkit error if node_modules/.cache + // is included as a cache folder. + "commands": ["...", "npm ci"] + } + } +} diff --git a/examples/node-npm-install-in-build/test.json b/examples/node-npm-install-in-build/test.json new file mode 100644 index 00000000..f089f738 --- /dev/null +++ b/examples/node-npm-install-in-build/test.json @@ -0,0 +1,5 @@ +[ + { + "expectedOutput": "hello from Node v23.5.0" + } +] diff --git a/mise.toml b/mise.toml index 8a9257c7..964b02ab 100644 --- a/mise.toml +++ b/mise.toml @@ -12,6 +12,9 @@ run = [ [tasks.cli] run = "go run cmd/cli/main.go" +[tasks.debug-cli] +run = "dlv debug github.com/railwayapp/railpack/cmd/cli --" + [tasks.build] run = "go build -o bin/railpack cmd/cli/main.go" @@ -51,6 +54,7 @@ golangci-lint = "1.63.4" bun = "1.2" node = "22" "go:github.com/wagoodman/dive" = "latest" +"go:github.com/go-delve/delve/cmd/dlv" = "latest" [settings] experimental = true