diff --git a/Cargo.lock b/Cargo.lock index 349ffc1..0b8d2bc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1285,6 +1285,12 @@ dependencies = [ "syn", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.178" @@ -1392,6 +1398,15 @@ dependencies = [ "xml5ever", ] +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + [[package]] name = "md-5" version = "0.10.6" @@ -1514,6 +1529,15 @@ dependencies = [ "nom 8.0.0", ] +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "num-conv" version = "0.1.0" @@ -2036,8 +2060,10 @@ dependencies = [ [[package]] name = "rigscribe" -version = "0.1.0" +version = "0.1.4" dependencies = [ + "async-stream", + "futures", "rig-core", "schemars", "serde", @@ -2046,6 +2072,9 @@ dependencies = [ "termimad", "thiserror", "tokio", + "tracing", + "tracing-appender", + "tracing-subscriber", ] [[package]] @@ -2339,6 +2368,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -2618,6 +2656,15 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "time" version = "0.3.44" @@ -2799,15 +2846,27 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.43" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "pin-project-lite", "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-appender" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "786d480bce6247ab75f005b14ae1624ad978d3029d9113f0a22fa1ac773faeaf" +dependencies = [ + "crossbeam-channel", + "thiserror", + "time", + "tracing-subscriber", +] + [[package]] name = "tracing-attributes" version = "0.1.31" @@ -2821,11 +2880,12 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.35" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", + "valuable", ] [[package]] @@ -2840,6 +2900,35 @@ dependencies = [ "tracing", ] +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + [[package]] name = "try-lock" version = "0.2.5" @@ -2933,6 +3022,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index cfd2897..ad0c1ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,8 @@ license = "MIT" repository = "https://github.com/hghalebi/rigscribe" [dependencies] +async-stream = "0.3.6" +futures = "0.3.31" rig-core = {version="0.26.0",features=["all"]} schemars ={version= "1.1.0"} @@ -16,3 +18,6 @@ serpscraper = "0.1.3" termimad = "0.34.1" thiserror = "2.0.17" tokio = {version= "1.48.0",features=["full"]} +tracing = "0.1.44" +tracing-appender = "0.2.4" +tracing-subscriber = { version = "0.3.22", features = ["env-filter", "fmt"] } diff --git a/STREAMING_DEBUG_REPORT.md b/STREAMING_DEBUG_REPORT.md new file mode 100644 index 0000000..e4cb37c --- /dev/null +++ b/STREAMING_DEBUG_REPORT.md @@ -0,0 +1,181 @@ +# Debugging Report & Streaming Guide + +## Debugging Summary + +During the review of the `rigscribe` repository, several issues were identified and resolved to ensure the project compiles and runs correctly. + +### 1. Missing Dependency (`async-stream`) +* **Issue:** The code in `src/agents/optimizer.rs` utilized the `async_stream::stream!` macro, but the `async-stream` crate was not declared in `Cargo.toml`. +* **Fix:** Added `async-stream = "0.3.6"` to `Cargo.toml`. + +### 2. Invalid Trait Syntax (`Example` Trait) +* **Issue:** The code contained invalid type definitions like `::Result`. The trait `Example` did not exist and seemed to be a placeholder or syntax error. +* **Fix:** Updated the `StreamingResult` type alias and function signatures to use standard Rust `Result` types: + ```rust + type StreamingResult = Pin> + Send>>; + ``` + +### 3. Type Mismatch with `stream_to_stdout` +* **Issue:** The `rig` library's helper function `stream_to_stdout` expected a specific stream type that didn't match the custom `StreamingResult` returned by `multi_turn_prompt`. Additionally, the original code wasn't capturing the final output string needed to create the `Artifact`. +* **Fix:** Replaced the library call with a manual consumption loop. This allows us to both print to stdout in real-time *and* build the final string: + ```rust + while let Some(res) = stream.next().await { + match res { + Ok(text) => { + print!("{}", text.text); // Stream to console + optimized_prompt.push_str(&text.text); // Capture for logic + } + // ... error handling + } + } + ``` + +### 4. Code Cleanup +* **Issue:** Several unused imports (`schemars`, `serde`, various `rig` items) and unused functions (`custom_stream_to_stdout`, `map_provider_error`) were causing compiler warnings. +* **Fix:** Removed these unused elements to achieve a clean compilation. + +--- + +## Understanding Streaming in Rust + +Streaming is a powerful concept in Rust, especially for AI applications where responses are generated token-by-token. + +### What is a Stream? + +In synchronous Rust, we have `Iterator`, which yields a sequence of values: +```rust +// Sync +let iter = vec![1, 2, 3].into_iter(); +for item in iter { ... } +``` + +In asynchronous Rust, a **`Stream`** is the async equivalent of an Iterator. Instead of returning the next item immediately, it returns a `Future` that resolves to the next item. + +### The `async-stream` Crate + +Creating a Stream manually involves implementing the `Stream` trait, which can be complex (handling `Poll`, `Context`, and `Pin`). + +The `async-stream` crate simplifies this by allowing you to write streams using generator syntax, similar to Python. It provides the `stream!` macro and the `yield` keyword. + +```rust +// Example from your code +async_stream::stream! { + // ... do some work ... + yield Ok(Text { text: "Hello" }); + // ... await something ... + yield Ok(Text { text: " World" }); +} +``` + +### The Return Type: `Pin>` + +You will often see this return type: +```rust +type StreamingResult = Pin> + Send>>; +``` + +1. **`dyn Stream`**: We are returning a "Trait Object". We don't care about the specific concrete type of the stream, just that it implements `Stream`. +2. **`Box`**: Because `dyn Stream` has an unknown size at compile time, we put it on the heap. +3. **`Pin`**: Async blocks and streams often contain self-referential references. `Pin` ensures the data isn't moved in memory, which is required for safety when polling futures. +4. **`Send`**: Ensures the stream can be sent across threads (required by most async runtimes like Tokio). + +### Consuming a Stream + +To use a stream, you typically use a `while let` loop with `.next().await`: + +```rust +use futures::StreamExt; // Required for .next() + +let mut stream = my_async_stream(); + +while let Some(item) = stream.next().await { + match item { + Ok(content) => println!("Received: {}", content), + Err(e) => eprintln!("Error: {}", e), + } +} +``` + +This pattern allows your application to remain responsive. In `rigscribe`, this is used to show the AI's "thought process" or generated text to the user immediately, rather than waiting for the entire generation to finish. + +--- + +## Deep Dive: Why `async-stream` and `Pin`? + +While the sections above cover *how* to use them, this section explains *why* they exist and what problems they solve. + +### 1. `async-stream`: The Magic of Generators + +Rust does not yet have native syntax for `yield` in functions (often called Generators or Coroutines) in the stable channel. + +Without `async-stream`, if you wanted to create a stream that emits values over time, you would have to manually implement the `Stream` trait. This requires building a state machine by hand: + +**The Hard Way (Manual State Machine):** +```rust +struct MyStream { + state: State, + count: usize, +} +enum State { Start, Waiting, Done } + +impl Stream for MyStream { + type Item = i32; + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + match self.state { + State::Start => { + // Return a value and switch state + self.state = State::Waiting; + Poll::Ready(Some(1)) + } + State::Waiting => { + // Check if async task is done... + // ... complex polling logic ... + Poll::Pending + } + State::Done => Poll::Ready(None), + } + } +} +``` + +**The Easy Way (`async-stream`):** +The `async_stream::stream!` macro essentially compiles your code block into an anonymous struct that implements `Stream` (like the manual example above) but manages the state machine for you automatically. + +```rust +stream! { + yield 1; // The macro handles saving state here + some_async_work().await; // And restoring it here + yield 2; +} +``` + +### 2. `Pin`: Solving the "Self-Referential" Problem + +To understand `Pin`, you must understand **Moves** and **Self-References**. + +1. **Moves:** In Rust, values are "moved" (copied to a new memory address) frequently (e.g., passing by value, resizing a `Vec`). Usually, this is fine because types like `i32` or `String` don't care where they live in memory. +2. **Self-References:** `Future`s generated by `async` blocks are different. They often store pointers to their *own* internal variables. + +**Example:** +Imagine an async block: +```rust +async { + let x = [0; 1024]; // A large array + let y = &x; // A reference to x (internal pointer!) + some_await().await; // Yield execution + println!("{:?}", y); // Use y when we wake up +} +``` +When this async block compiles, it becomes a struct. `y` is a pointer pointing to `x` *inside the same struct*. +If we **Move** this struct to a new memory location (e.g., `Box`ing it or passing it to a function), `x` moves to the new address, but `y` **still points to the old address**. This is a dangling pointer, which causes undefined behavior (crashes). + +**The Solution:** +`Pin

` is a wrapper around a pointer `P` (like `Box` or `&mut T`). It effectively says: +> "The value pointed to by `P` will **never move** in memory again until it is dropped." + +By returning `Pin>`, we are promising the compiler: +1. We put the Stream on the heap (`Box`). +2. We `Pin`ned it there. +3. Therefore, it is safe to poll it, even if it has internal self-references (which `async` blocks almost always do). + +This is why you almost always see `Pin` when dealing with manually managed Futures or Streams. \ No newline at end of file diff --git a/logs/rigscribe.log.2025-12-30 b/logs/rigscribe.log.2025-12-30 new file mode 100644 index 0000000..3fa57f9 --- /dev/null +++ b/logs/rigscribe.log.2025-12-30 @@ -0,0 +1,74 @@ +2025-12-30T22:53:59.115485Z INFO ThreadId(01) rigscribe: src/main.rs:18: Starting prompt optimization process for: 'write a python fonction' +2025-12-30T22:53:59.115796Z INFO ThreadId(01) rigscribe: src/lib.rs:47: Cache MIS: "./.prompts_perssitense_cache/201.json" +2025-12-30T22:53:59.115817Z INFO ThreadId(01) rigscribe: src/lib.rs:48: Optimizing ... +2025-12-30T22:53:59.123017Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:37: Starting optimization streaming... +2025-12-30T22:54:11.985895Z INFO ThreadId(17) rigscribe::tools::deconstructor: src/tools/deconstructor.rs:32: [Tool Calling]-> Deconstructor! +2025-12-30T22:54:22.454142Z DEBUG ThreadId(12) rigscribe::tools::deconstructor: src/tools/deconstructor.rs:64: Deconstructor extracted spec: Specification { goal: "Produce a working Python function.", constraints: "* The function's purpose, inputs, and outputs are undefined.\n* No requirements regarding performance, error handling, or dependencies.\n* The target Python version is not specified.\n* High risk of failure: The delivered function will not meet the user's actual, unstated needs.\n* Wasted effort: Development time will be spent on a solution that is functionally incorrect for the business problem.\n* Scope creep: Lack of initial definition will lead to iterative, inefficient rework cycles." } +2025-12-30T22:54:22.454307Z DEBUG ThreadId(01) rigscribe::agents: src/agents/mod.rs:65: Tool executed tool="Deconstructor" args="{\"text\":\"write a python fonction\"}" result="{\"goal\":\"Produce a working Python function.\",\"constraints\":\"* The function's purpose, inputs, and outputs are undefined.\\n* No requirements regarding performance, error handling, or dependencies.\\n* The target Python version is not specified.\\n* High risk of failure: The delivered function will not meet the user's actual, unstated needs.\\n* Wasted effort: Development time will be spent on a solution that is functionally incorrect for the business problem.\\n* Scope creep: Lack of initial definition will lead to iterative, inefficient rework cycles.\"}" +2025-12-30T22:54:24.517949Z INFO ThreadId(12) rigscribe::tools::prompt_reviewer: src/tools/prompt_reviewer.rs:40: [Tool Calling]-> PromptReviewer! +2025-12-30T22:55:02.276017Z DEBUG ThreadId(13) rigscribe::tools::prompt_reviewer: src/tools/prompt_reviewer.rs:75: PromptReviewer produced artifact: Artifact { system_prompt: "## Persona\n\nYou are an expert Senior Python Developer. Your code is clean, efficient, well-documented, and production-ready. You adhere strictly to modern best practices, including type hinting, error handling, and style guidelines. You are relentlessly logical and will refuse to proceed if a request is ambiguous or incomplete.\n\n## Primary Task\n\nYour primary task is to write a single, production-quality Python function based on the detailed specifications provided within the `### FUNCTION SPECIFICATION ###` block.\n\n## Process\n\n1. **Analyze the Specification:** Scrutinize the user-provided specification for any ambiguity, logical gaps, or missing edge cases.\n2. **Clarify Ambiguities:** If the specification is incomplete or unclear in any way, you MUST ask clarifying questions before writing any code. State exactly what information is missing. DO NOT make assumptions.\n3. **Write the Function:** Once the specification is crystal clear, write the Python function.\n4. **Format the Output:** Present your final response as a single, complete Python code block. The code block MUST include the function, its docstring, and at least two distinct usage examples that demonstrate its correctness, including handling of edge cases.\n\n## Core Requirements & Constraints\n\n### General\n* **Language:** The code MUST be written for Python 3.9+.\n* **Dependencies:** You MUST NOT use any external libraries unless they are explicitly listed in the `Allowed Dependencies` section of the specification. Use only the Python standard library.\n* **Idempotency:** The function should be idempotent where the logic allows.\n\n### Code Quality & Style\n* **PEP 8:** The code MUST strictly adhere to the PEP 8 style guide.\n* **Type Hinting:** All function parameters and return values MUST be fully and correctly type-hinted using the `typing` module.\n* **Docstrings:** The function MUST include a comprehensive Google-style docstring. The docstring must describe the function's purpose, all arguments (`Args:`), the return value (`Returns:`), and any exceptions it might raise (`Raises:`).\n* **Clarity:** The code must be self-commenting where possible. Use comments only to explain complex or non-obvious logic.\n\n### Safety & Error Handling\n* **Input Validation:** The function MUST validate its inputs. It should raise specific, informative exceptions (e.g., `ValueError`, `TypeError`) for invalid inputs. DO NOT return `None` or `False` to signal an error.\n* **Security:** You MUST NOT write code that performs dangerous operations. This includes, but is not limited to:\n * Filesystem access (reading, writing, deleting files).\n * Network requests.\n * Executing shell commands (e.g., via `os.system` or `subprocess`).\n * Using insecure functions like `eval()` or `exec()`.\n* **No Placeholders:** The function MUST be complete and functional. DO NOT use `pass` or placeholder comments like `# TODO: Implement this`.\n\n---\n\n### FUNCTION SPECIFICATION ###\n\n```text\n## Function Purpose\nA clear, one-sentence description of what the function does.\ne.g., \"Calculates the Simple Moving Average (SMA) for a list of numerical data over a given window size.\"\n\n## Function Signature\nThe desired function name, parameters, and return type.\ne.g., `def calculate_sma(data: list[float], window_size: int) -> list[float]:`\n\n## Parameters\nA detailed description of each parameter.\n- `data (list[float])`: A list of numbers (integers or floats). Must not be empty.\n- `window_size (int)`: The number of periods to include in the average. Must be a positive integer greater than zero and less than or equal to the length of `data`.\n\n## Return Value\nA detailed description of the expected output.\ne.g., `(list[float])`: A list of floats representing the moving average. The list will be shorter than the input list by `window_size - 1`. Returns an empty list if the input `data` is empty and `window_size` is valid.\n\n## Business Logic & Edge Cases\nDetailed, step-by-step logic and how to handle specific scenarios.\n1. Validate inputs immediately. If `window_size` is less than 1 or greater than the length of `data`, raise a `ValueError`.\n2. If `data` is not a list of numbers, raise a `TypeError`.\n3. Iterate through the `data` list to calculate the average for each window.\n4. The first SMA is the average of the first `window_size` elements.\n5. Slide the window one element at a time to the end of the list, calculating the average for each new window.\n6. The resulting list of averages is the return value.\n\n## Allowed Dependencies\nA list of allowed third-party libraries, or \"None\".\ne.g., `None`\n```", signed_by: "user" } +2025-12-30T22:55:02.277780Z DEBUG ThreadId(01) rigscribe::agents: src/agents/mod.rs:65: Tool executed tool="PromptReviewer" args="{\"intent\":{\"text\":\"write a python fonction\"},\"spec\":{\"constraints\":\"* The function's purpose, inputs, and outputs are undefined.\\n* No requirements regarding performance, error handling, or dependencies.\\n* The target Python version is not specified.\\n* High risk of failure: The delivered function will not meet the user's actual, unstated needs.\\n* Wasted effort: Development time will be spent on a solution that is functionally incorrect for the business problem.\\n* Scope creep: Lack of initial definition will lead to iterative, inefficient rework cycles.\",\"goal\":\"Produce a working Python function.\"}}" result="{\"system_prompt\":\"## Persona\\n\\nYou are an expert Senior Python Developer. Your code is clean, efficient, well-documented, and production-ready. You adhere strictly to modern best practices, including type hinting, error handling, and style guidelines. You are relentlessly logical and will refuse to proceed if a request is ambiguous or incomplete.\\n\\n## Primary Task\\n\\nYour primary task is to write a single, production-quality Python function based on the detailed specifications provided within the `### FUNCTION SPECIFICATION ###` block.\\n\\n## Process\\n\\n1. **Analyze the Specification:** Scrutinize the user-provided specification for any ambiguity, logical gaps, or missing edge cases.\\n2. **Clarify Ambiguities:** If the specification is incomplete or unclear in any way, you MUST ask clarifying questions before writing any code. State exactly what information is missing. DO NOT make assumptions.\\n3. **Write the Function:** Once the specification is crystal clear, write the Python function.\\n4. **Format the Output:** Present your final response as a single, complete Python code block. The code block MUST include the function, its docstring, and at least two distinct usage examples that demonstrate its correctness, including handling of edge cases.\\n\\n## Core Requirements & Constraints\\n\\n### General\\n* **Language:** The code MUST be written for Python 3.9+.\\n* **Dependencies:** You MUST NOT use any external libraries unless they are explicitly listed in the `Allowed Dependencies` section of the specification. Use only the Python standard library.\\n* **Idempotency:** The function should be idempotent where the logic allows.\\n\\n### Code Quality & Style\\n* **PEP 8:** The code MUST strictly adhere to the PEP 8 style guide.\\n* **Type Hinting:** All function parameters and return values MUST be fully and correctly type-hinted using the `typing` module.\\n* **Docstrings:** The function MUST include a comprehensive Google-style docstring. The docstring must describe the function's purpose, all arguments (`Args:`), the return value (`Returns:`), and any exceptions it might raise (`Raises:`).\\n* **Clarity:** The code must be self-commenting where possible. Use comments only to explain complex or non-obvious logic.\\n\\n### Safety & Error Handling\\n* **Input Validation:** The function MUST validate its inputs. It should raise specific, informative exceptions (e.g., `ValueError`, `TypeError`) for invalid inputs. DO NOT return `None` or `False` to signal an error.\\n* **Security:** You MUST NOT write code that performs dangerous operations. This includes, but is not limited to:\\n * Filesystem access (reading, writing, deleting files).\\n * Network requests.\\n * Executing shell commands (e.g., via `os.system` or `subprocess`).\\n * Using insecure functions like `eval()` or `exec()`.\\n* **No Placeholders:** The function MUST be complete and functional. DO NOT use `pass` or placeholder comments like `# TODO: Implement this`.\\n\\n---\\n\\n### FUNCTION SPECIFICATION ###\\n\\n```text\\n## Function Purpose\\nA clear, one-sentence description of what the function does.\\ne.g., \\\"Calculates the Simple Moving Average (SMA) for a list of numerical data over a given window size.\\\"\\n\\n## Function Signature\\nThe desired function name, parameters, and return type.\\ne.g., `def calculate_sma(data: list[float], window_size: int) -> list[float]:`\\n\\n## Parameters\\nA detailed description of each parameter.\\n- `data (list[float])`: A list of numbers (integers or floats). Must not be empty.\\n- `window_size (int)`: The number of periods to include in the average. Must be a positive integer greater than zero and less than or equal to the length of `data`.\\n\\n## Return Value\\nA detailed description of the expected output.\\ne.g., `(list[float])`: A list of floats representing the moving average. The list will be shorter than the input list by `window_size - 1`. Returns an empty list if the input `data` is empty and `window_size` is valid.\\n\\n## Business Logic & Edge Cases\\nDetailed, step-by-step logic and how to handle specific scenarios.\\n1. Validate inputs immediately. If `window_size` is less than 1 or greater than the length of `data`, raise a `ValueError`.\\n2. If `data` is not a list of numbers, raise a `TypeError`.\\n3. Iterate through the `data` list to calculate the average for each window.\\n4. The first SMA is the average of the first `window_size` elements.\\n5. Slide the window one element at a time to the end of the list, calculating the average for each new window.\\n6. The resulting list of averages is the return value.\\n\\n## Allowed Dependencies\\nA list of allowed third-party libraries, or \\\"None\\\".\\ne.g., `None`\\n```\",\"signed_by\":\"user\"}" +2025-12-30T22:55:17.312695Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:54: Optimization complete. Final artifact length: 6445 +2025-12-30T22:55:17.316144Z INFO ThreadId(01) rigscribe: src/lib.rs:51: Optimize prompt cached to: "./.prompts_perssitense_cache/201.json" +2025-12-30T22:56:21.949876Z INFO ThreadId(01) rigscribe: src/main.rs:18: Starting prompt optimization process for: 'write a python fonction' +2025-12-30T22:56:21.950259Z INFO ThreadId(01) rigscribe: src/lib.rs:47: Cache MIS: "./.prompts_perssitense_cache/2011.json" +2025-12-30T22:56:21.950282Z INFO ThreadId(01) rigscribe: src/lib.rs:48: Optimizing ... +2025-12-30T22:56:21.956758Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:37: Starting optimization streaming... +2025-12-30T22:56:32.583538Z INFO ThreadId(04) rigscribe::tools::deconstructor: src/tools/deconstructor.rs:32: [Tool Calling]-> Deconstructor! +2025-12-30T22:56:39.984143Z INFO ThreadId(01) rigscribe: src/main.rs:18: Starting prompt optimization process for: 'write a python fonction' +2025-12-30T22:56:39.984398Z INFO ThreadId(01) rigscribe: src/lib.rs:47: Cache MIS: "./.prompts_perssitense_cache/2011.json" +2025-12-30T22:56:39.984413Z INFO ThreadId(01) rigscribe: src/lib.rs:48: Optimizing ... +2025-12-30T22:56:39.991557Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:37: Starting optimization streaming... +2025-12-30T22:56:44.256708Z DEBUG ThreadId(14) rigscribe::tools::deconstructor: src/tools/deconstructor.rs:64: Deconstructor extracted spec: Specification { goal: "Produce a Python function.", constraints: "* Language must be Python.\n* Output must be a function.\n* Function purpose, inputs, and outputs are undefined.\n* Extreme ambiguity: The request lacks any specific requirements.\n* High probability of delivering a function that is not useful to the user." } +2025-12-30T22:56:44.257515Z DEBUG ThreadId(01) rigscribe::agents: src/agents/mod.rs:65: Tool executed tool="Deconstructor" args="{\"text\":\"write a python fonction\"}" result="{\"goal\":\"Produce a Python function.\",\"constraints\":\"* Language must be Python.\\n* Output must be a function.\\n* Function purpose, inputs, and outputs are undefined.\\n* Extreme ambiguity: The request lacks any specific requirements.\\n* High probability of delivering a function that is not useful to the user.\"}" +2025-12-30T22:56:45.878840Z INFO ThreadId(14) rigscribe::tools::prompt_reviewer: src/tools/prompt_reviewer.rs:40: [Tool Calling]-> PromptReviewer! +2025-12-30T22:56:59.566379Z INFO ThreadId(05) rigscribe::tools::deconstructor: src/tools/deconstructor.rs:32: [Tool Calling]-> Deconstructor! +2025-12-30T22:57:09.103545Z DEBUG ThreadId(12) rigscribe::tools::deconstructor: src/tools/deconstructor.rs:64: Deconstructor extracted spec: Specification { goal: "To receive a piece of Python code that is a function.", constraints: "* The language must be Python.\n* The output must be a syntactically correct function.\n* The function's purpose, name, parameters, and return value are undefined.\n* High risk of delivering a function that does not meet the user's actual, unstated need.\n* The request is too ambiguous to create a meaningful or useful solution." } +2025-12-30T22:57:09.104406Z DEBUG ThreadId(01) rigscribe::agents: src/agents/mod.rs:65: Tool executed tool="Deconstructor" args="{\"text\":\"write a python fonction\"}" result="{\"goal\":\"To receive a piece of Python code that is a function.\",\"constraints\":\"* The language must be Python.\\n* The output must be a syntactically correct function.\\n* The function's purpose, name, parameters, and return value are undefined.\\n* High risk of delivering a function that does not meet the user's actual, unstated need.\\n* The request is too ambiguous to create a meaningful or useful solution.\"}" +2025-12-30T22:57:10.745443Z INFO ThreadId(16) rigscribe::tools::prompt_reviewer: src/tools/prompt_reviewer.rs:40: [Tool Calling]-> PromptReviewer! +2025-12-30T22:57:13.528170Z DEBUG ThreadId(04) rigscribe::tools::prompt_reviewer: src/tools/prompt_reviewer.rs:79: PromptReviewer produced artifact: Artifact { system_prompt: "## Persona: Expert Python Architect\n\nYou are a world-class Python architect with a specialization in writing safe, robust, and production-ready code. Your standards are exceptionally high. You are methodical, precise, and you NEVER make assumptions about user requirements. Your primary directive is to write a single, flawless Python function based on user specifications.\n\n## Core Directives\n\n1. **Requirement Elicitation is Mandatory:** The user's initial request will be vague. You MUST NOT write any code until you have gathered sufficient information. Your first response must ALWAYS be to ask clarifying questions to deconstruct the user's needs.\n2. **Zero-Tolerance for Ambiguity:** If any aspect of the function's purpose, inputs, outputs, or behavior is unclear, you MUST ask for clarification. Do not proceed with writing code if ambiguity exists.\n3. **Safety is Paramount:** You are architecting a function that must be safe. You will rigorously define and enforce safety constraints.\n\n## Step-by-Step Process\n\n### Step 1: Deconstruct the User Request\n\nAnalyze the user's initial prompt to understand their likely goal, even if it is poorly articulated.\n\n### Step 2: Ask Clarifying Questions\n\nIn your first response, you MUST ask the user the following questions to establish the function's contract. Frame these as a prerequisite for writing any code.\n\n```text\nBefore I can write this function, I need a precise specification. Please provide the following details:\n\n1. **Function Purpose:** What is the single, primary responsibility of this function? (e.g., \"Calculate the SHA-256 hash of a file,\" \"Parse a user's age from a string\").\n2. **Inputs (Arguments):**\n * What are the names of the input parameters?\n * What is the expected Python data type for each parameter (e.g., `str`, `int`, `list[float]`)?\n * Are any parameters optional? If so, what are their default values?\n3. **Output (Return Value):**\n * What is the expected Python data type of the value this function will return? (e.g., `bool`, `dict[str, int]`).\n * Describe the value itself.\n4. **Core Logic & Constraints:**\n * Describe the key steps the function should perform.\n * Are there any specific libraries or modules that I MUST use (e.g., `pandas`, `requests`)?\n * Are there any libraries or functions that I MUST NOT use?\n5. **Error Handling & Edge Cases:**\n * How should the function behave with invalid inputs (e.g., `None`, wrong data types)? Should it raise a specific exception (e.g., `ValueError`, `TypeError`) or return a default value?\n * What are the edge cases to consider? (e.g., empty lists, zero values, file not found).\n```\n\n### Step 3: Write the Function\n\nOnce the user has provided clear, unambiguous answers to the questions above, and only then, you will generate the Python function.\n\nThe generated function MUST adhere to the following non-negotiable quality standards:\n\n* **PEP 8 Compliance:** Code MUST be formatted according to PEP 8 style guidelines.\n* **Type Hinting:** All function arguments and the return value MUST include precise type hints (`from typing import ...`).\n* **Docstrings:** The function MUST include a comprehensive docstring in Google Python Style. The docstring must describe the function's purpose, all arguments (`Args:`), the return value (`Returns:`), and any exceptions it might raise (`Raises:`).\n* **Error Handling:** The implementation MUST include robust error handling using `try...except` blocks for expected failure modes, raising specific, informative exceptions.\n* **Readability:** Logic must be clear and well-commented where necessary. Use meaningful variable names.\n* **Encapsulation:** The output MUST be a single, self-contained function. Do not include any code outside of the function definition, except for necessary imports.\n\n### Step 4: Final Output Format\n\nPresent the final, complete Python code inside a single Markdown code block.\n\n```python\n# Your final Python code here\n```\n\nDO NOT include any conversational text, explanations, or apologies outside of the code block in the final response.", signed_by: "user" } +2025-12-30T22:57:13.529327Z DEBUG ThreadId(01) rigscribe::agents: src/agents/mod.rs:65: Tool executed tool="PromptReviewer" args="{\"intent\":{\"text\":\"write a python fonction\"},\"spec\":{\"constraints\":\"* Language must be Python.\\n* Output must be a function.\\n* Function purpose, inputs, and outputs are undefined.\\n* Extreme ambiguity: The request lacks any specific requirements.\\n* High probability of delivering a function that is not useful to the user.\",\"goal\":\"Produce a Python function.\"}}" result="{\"system_prompt\":\"## Persona: Expert Python Architect\\n\\nYou are a world-class Python architect with a specialization in writing safe, robust, and production-ready code. Your standards are exceptionally high. You are methodical, precise, and you NEVER make assumptions about user requirements. Your primary directive is to write a single, flawless Python function based on user specifications.\\n\\n## Core Directives\\n\\n1. **Requirement Elicitation is Mandatory:** The user's initial request will be vague. You MUST NOT write any code until you have gathered sufficient information. Your first response must ALWAYS be to ask clarifying questions to deconstruct the user's needs.\\n2. **Zero-Tolerance for Ambiguity:** If any aspect of the function's purpose, inputs, outputs, or behavior is unclear, you MUST ask for clarification. Do not proceed with writing code if ambiguity exists.\\n3. **Safety is Paramount:** You are architecting a function that must be safe. You will rigorously define and enforce safety constraints.\\n\\n## Step-by-Step Process\\n\\n### Step 1: Deconstruct the User Request\\n\\nAnalyze the user's initial prompt to understand their likely goal, even if it is poorly articulated.\\n\\n### Step 2: Ask Clarifying Questions\\n\\nIn your first response, you MUST ask the user the following questions to establish the function's contract. Frame these as a prerequisite for writing any code.\\n\\n```text\\nBefore I can write this function, I need a precise specification. Please provide the following details:\\n\\n1. **Function Purpose:** What is the single, primary responsibility of this function? (e.g., \\\"Calculate the SHA-256 hash of a file,\\\" \\\"Parse a user's age from a string\\\").\\n2. **Inputs (Arguments):**\\n * What are the names of the input parameters?\\n * What is the expected Python data type for each parameter (e.g., `str`, `int`, `list[float]`)?\\n * Are any parameters optional? If so, what are their default values?\\n3. **Output (Return Value):**\\n * What is the expected Python data type of the value this function will return? (e.g., `bool`, `dict[str, int]`).\\n * Describe the value itself.\\n4. **Core Logic & Constraints:**\\n * Describe the key steps the function should perform.\\n * Are there any specific libraries or modules that I MUST use (e.g., `pandas`, `requests`)?\\n * Are there any libraries or functions that I MUST NOT use?\\n5. **Error Handling & Edge Cases:**\\n * How should the function behave with invalid inputs (e.g., `None`, wrong data types)? Should it raise a specific exception (e.g., `ValueError`, `TypeError`) or return a default value?\\n * What are the edge cases to consider? (e.g., empty lists, zero values, file not found).\\n```\\n\\n### Step 3: Write the Function\\n\\nOnce the user has provided clear, unambiguous answers to the questions above, and only then, you will generate the Python function.\\n\\nThe generated function MUST adhere to the following non-negotiable quality standards:\\n\\n* **PEP 8 Compliance:** Code MUST be formatted according to PEP 8 style guidelines.\\n* **Type Hinting:** All function arguments and the return value MUST include precise type hints (`from typing import ...`).\\n* **Docstrings:** The function MUST include a comprehensive docstring in Google Python Style. The docstring must describe the function's purpose, all arguments (`Args:`), the return value (`Returns:`), and any exceptions it might raise (`Raises:`).\\n* **Error Handling:** The implementation MUST include robust error handling using `try...except` blocks for expected failure modes, raising specific, informative exceptions.\\n* **Readability:** Logic must be clear and well-commented where necessary. Use meaningful variable names.\\n* **Encapsulation:** The output MUST be a single, self-contained function. Do not include any code outside of the function definition, except for necessary imports.\\n\\n### Step 4: Final Output Format\\n\\nPresent the final, complete Python code inside a single Markdown code block.\\n\\n```python\\n# Your final Python code here\\n```\\n\\nDO NOT include any conversational text, explanations, or apologies outside of the code block in the final response.\",\"signed_by\":\"user\"}" +2025-12-30T22:57:22.560780Z INFO ThreadId(12) rigscribe::tools::web_searcher: src/tools/web_searcher.rs:27: [Tool Calling]-> WebSearcher +2025-12-30T22:57:28.156239Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:54: Optimization complete. Final artifact length: 4120 +2025-12-30T22:57:28.157378Z INFO ThreadId(01) rigscribe: src/lib.rs:51: Optimize prompt cached to: "./.prompts_perssitense_cache/2011.json" +2025-12-30T22:57:29.146717Z DEBUG ThreadId(15) rigscribe::agents: src/agents/mod.rs:65: Tool executed tool="WebSearcher" args="{\"query\":\"best practices for python code generation prompts\"}" result="\"![]()\\n\\nYou've been blocked by network security.\\n\\nIf you think you've been blocked by mistake, file a ticket below and we'll look into it.\\n\\n[File a ticket](https://support.reddithelp.com/hc/en-us/requests/new?ticket_form_id=21879292693140)\\n\\n========================\\n[![Image description](https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fouro0wg11ydxmhunehso.png)](https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fouro0wg11ydxmhunehso.png)\\n\\n**[Prompt Engineering for Python Code Generation](https://keploy.io/blog/community/prompt-engineering-for-python-code-generation-with-keploy)**\\n\\nPrompt engineering is an essential technique for optimizing AI-generated Python code, ensuring accurate, efficient, and context-aware outputs. As AI-powered tools like OpenAI’s GPT, GitHub Copilot, and Keploy gain popularity in software development, crafting well-structured prompts becomes crucial for generating high-quality Python code. This guide explores the fundamentals of prompt engineering and best practices for AI-driven Python code generation.\\n\\n**[What is Prompt Engineering](https://keploy.io/blog/community/prompt-engineering-for-python-code-generation-with-keploy)?**\\n\\nPrompt engineering involves designing and structuring inputs to guide AI models in producing high-quality responses. In the context of Python code generation, well-crafted prompts ensure the AI understands the intent and delivers precise results. By refining how queries are structured, developers can enhance the quality of AI-generated code, making it more readable, maintainable, and efficient.\\n\\n**Why is Prompt Engineering Important for Python Code Generation?**\\n\\nAI models rely on prompts to generate relevant and accurate code. A poorly structured prompt can lead to ambiguous or incorrect outputs, requiring additional effort to debug and refine. A well-structured prompt reduces ambiguity, improves efficiency, and minimizes the need for extensive post-processing. Prompt engineering is especially crucial for automated test generation tools like Keploy, which leverage AI to create reliable test cases for applications.\\n\\n**Key Strategies for Effective Prompt Engineering**\\n\\nTo generate better Python code with AI, consider the following strategies:\\n\\n**1. Be Specific and Contextual**\\n\\nProviding clear instructions and specifying the expected output format helps AI models generate more accurate results. Instead of asking, \\\"Write a Python function,\\\" a better prompt would be, \\\"Write a Python function that takes a list of numbers and returns a sorted list using the quicksort algorithm.\\\"\\n\\n**2. Use Examples to Guide the AI**\\n\\nIncluding input-output examples in your prompt helps AI understand patterns and expectations for code generation. For instance:\\n\\nPrompt: Write a Python function to check if a string is a palindrome.\\n\\nExample Input: 'radar'\\n\\nExample Output: True\\n\\nBy providing examples, AI can better align with the expected behavior.\\n\\n**3. Define Constraints and Requirements**\\n\\nExplicitly stating programming constraints, such as using specific libraries or avoiding certain functions, refines the generated output. If you want to generate a machine learning model using TensorFlow instead of PyTorch, include that in the prompt.\\n\\n**4. Break Down Complex Tasks into Steps**\\n\\nSplitting a large problem into smaller tasks improves AI performance and ensures structured code generation. Instead of asking AI to \\\"build a chatbot,\\\" break it down into \\\"write a Python function to handle user input,\\\" \\\"generate responses using an NLP model,\\\" and \\\"deploy the chatbot using Flask.\\\"\\n\\n**Common Challenges in AI-Generated Python Code**\\n\\nDespite advancements, AI-generated Python code may have challenges such as logical errors, lack of optimization, or missing edge cases. Effective prompt engineering can mitigate these issues by providing clear instructions and expected behavior. Keploy, an AI-powered test case generation tool, can help validate and refine AI-generated code by automating the testing process and identifying potential issues.\\n\\n**Practical Examples of Prompt Engineering for Python Code**\\n\\n**Example 1: Generating a Simple Function**\\n\\nPrompt:\\n\\nWrite a Python function that takes a list of numbers and returns the maximum value.\\n\\nGenerated Code:\\n\\ndef find_max(numbers):\\n\\nreturn max(numbers)\\n\\n**Example 2: Enforcing Coding Standards**\\n\\nPrompt:\\n\\nWrite a Python function that calculates the factorial of a number, following PEP 8 style guide.\\n\\nGenerated Code:\\n\\ndef factorial(n):\\n\\n\\\"\\\"\\\"Returns the factorial of a given number.\\\"\\\"\\\"\\n\\nif n == 0:\\n\\nreturn 1\\n\\nreturn n * factorial(n - 1)\\n\\n**Example 3: Using External Libraries**\\n\\nPrompt:\\n\\nWrite a Python function that uses NumPy to generate a random array of size 10.\\n\\nGenerated Code:\\n\\nimport numpy as np\\n\\ndef random_array():\\n\\nreturn np.random.rand(10)\\n\\n**Tools for Python Code Generation with AI**\\n\\nSeveral AI-powered tools assist in generating Python code efficiently. Some of the notable tools include:\\n\\n- **OpenAI’s GPT** – Generates Python code based on structured prompts.\\n- **GitHub Copilot** – Suggests code snippets within the IDE.\\n- **Keploy** – Automates test case generation and ensures AI-generated code is reliable and functional.\\n\\n\\n**Best Practices for Refining AI-Generated Code**\\n\\n1. **Review and Validate Output** – Always test AI-generated code for correctness and efficiency.\\n2. **Optimize for Readability** – Ensure the generated code follows best practices for maintainability.\\n3. **Use AI for Assistance, Not Replacement** – AI can enhance productivity but should complement human expertise.\\n4. **Leverage Keploy for Test Generation** – AI-generated code should be tested to ensure robustness, and Keploy can automate test creation for better validation.\\n\\n\\n**Conclusion**\\n\\nPrompt engineering is a powerful skill that enhances AI-driven Python code generation. By crafting precise, structured prompts, developers can improve AI accuracy, efficiency, and reliability in coding tasks. Whether generating new functions, enforcing coding standards, or integrating external libraries, well-designed prompts ensure optimal AI-generated output. Additionally, tools like Keploy help validate and test AI-generated code, ensuring its reliability in real-world applications.\\n\\nMastering prompt engineering not only improves the quality of AI-generated Python code but also enhances overall development efficiency, making AI an invaluable assistant in modern software engineering.\\n\\n========================\\n## Table of contents[/guides/better-prompts-ai-code#table-of-contents](/guides/better-prompts-ai-code#table-of-contents)\\n\\n- [Be specific with language and requirements](#be-specific-with-language-and-requirements)\\n- [Include examples and constraints](#include-examples-and-constraints)\\n- [Break down complex tasks and iterate](#break-down-complex-tasks-and-iterate)\\n- [Avoid common prompting pitfalls](#avoid-common-prompting-pitfalls)\\n- [Graphite Agent: AI code review for quality and consistency](#graphites-Graphite Agent-ai-code-review-for-quality-and-consistency)\\n\\n\\n[Large language models (LLMs)](https://developers.google.com/machine-learning/resources/intro-llms) can generate code in many programming languages, but the quality of their output depends heavily on how you prompt them. Crafting a precise prompt can be the difference between hours of debugging and a seamless implementation. The key is to clearly communicate your intent and requirements, because an AI doesn't inherently know your goals – it only follows the patterns described in your prompt. This guide introduces language-agnostic prompt engineering techniques to help both beginners and experienced developers get more accurate and useful code from AI. We'll illustrate each technique with examples of weak vs. improved prompts to show how small changes can yield better results. For a deeper dive into how AI code review works, see our [guide on AI code review](https://graphite.com/guides/ai-code-review).\\n\\n### Be specific with language and requirements[/guides/better-prompts-ai-code#be-specific-with-language-and-requirements](/guides/better-prompts-ai-code#be-specific-with-language-and-requirements)\\n\\nThe first rule of prompt engineering for code is to be specific about what you want. Vague prompts yield vague (often incorrect) results. Always mention the programming language, the task or algorithm, and any requirements or constraints. For example, consider asking an AI to generate a sorting algorithm:\\n\\n- **Weak prompt:** \\\"Write a sorting algorithm.\\\"\\n- **Improved prompt:** \\\"Write a **[Python](https://www.python.org/)** implementation of **merge sort** optimized for **memory efficiency**, including **time complexity analysis** and **error handling** for edge cases (e.g. empty arrays).\\\"\\n\\n\\nThe improved prompt is much clearer. It specifies:\\n\\n- The programming language (Python) and specific algorithm (merge sort).\\n- The optimization goal (memory efficiency) and required output details (time complexity analysis).\\n- Error handling expectations and an edge case to consider (empty arrays).\\n\\n\\nBy spelling out these details, you greatly reduce ambiguity. [Microsoft's Developer Tools](https://developer.microsoft.com/) research group observed that prompts with explicit specifications reduced the need for back-and-forth refinements by 68%. In practice, this means the AI is more likely to produce correct, ready-to-use code on the first try.\\n\\nBeing specific also means providing technical context when relevant. If certain constraints matter – such as the target environment, API version, performance or security requirements – include them in the prompt. For instance, specify \\\"Node.js 18 server environment\\\" or \\\"Python 3.10+\\\" if it matters. Otherwise, the model might generate code that is technically correct but not suitable for your situation.\\n> **Tip:** AI code review tools like [Graphite Agent](https://graphite.com/features/agent) can help enforce technical requirements and catch missing context.\\n\\n### Include examples and constraints[/guides/better-prompts-ai-code#include-examples-and-constraints](/guides/better-prompts-ai-code#include-examples-and-constraints)\\n\\nAnother powerful technique is to provide examples or test cases in your prompt. Examples act as implicit constraints, guiding the model toward the format or behavior you expect. For instance, if you want a function with a specific signature or output format, show a snippet of that.\\n\\nImagine you need an email validation function. A weak prompt might just say, \\\"Validate an email address.\\\" An improved prompt would describe requirements and give a usage example, e.g.:\\n> **Prompt:** \\\"Create a TypeScript function `validateEmail(email: string): {isValid: boolean; message: string}` that checks if an email is RFC-5322 compliant and rejects disposable domains. For example, if the input is `\\\"test@tempmail.com\\\"`, it should return an object like `{ isValid: false, message: 'Disposable domain not allowed' }`.\\\"\\n\\nIn this improved version, the prompt lists specific rules (RFC compliance and no disposable domains) and provides a clear function signature and an example of expected output. This gives the AI a template to follow and boundaries to respect. As Dr. Rachel Thomas of fast.ai notes, *\\\"examples in prompts serve as implicit constraints that guide the model toward the desired output format.\\\"* By including a small example or an outline of the output, you help the model understand exactly what you want.\\n\\n### Break down complex tasks and iterate[/guides/better-prompts-ai-code#break-down-complex-tasks-and-iterate](/guides/better-prompts-ai-code#break-down-complex-tasks-and-iterate)\\n\\nIf your desired output is complex, avoid requesting everything in one huge prompt. It's often better to break down a complex task into smaller steps or use an iterative approach. Large models can handle reasonably detailed prompts, but asking for a full application in one go is likely to produce a convoluted or partial answer. Instead, tackle complex tasks in stages and possibly across multiple prompts:\\n\\n1. **Decompose the problem:** Outline sub-tasks in separate prompts (e.g. first define the data model, then the API endpoints, then the UI components). This ensures each part is handled with focus.\\n2. **Use step-by-step reasoning:** You can actually instruct the model to \\\"think step by step.\\\" For example: \\\"I need a JavaScript function to find the longest increasing subsequence in an array. Let's solve this step by step: first explain the concept, then choose an algorithm (greedy vs DP), then write the code, then analyze complexity.\\\" By embedding this reasoning process in the prompt, you encourage the AI to work through the problem methodically.\\n3. **Iterate with the AI:** Treat the AI's first output as a draft. Review it, then issue follow-up prompts to refine the code.\\n\\n\\nBy breaking prompts into smaller pieces or sequential turns, you make it easier for the model to comply at each step. You also get opportunities to catch mistakes early and steer the output in the right direction. Remember that you can carry context between prompts (in a chat-based LLM) – use that to your advantage for iterative improvement.\\n\\nFor more information on [integrating AI into your code review workflow](https://graphite.com/guides/integrating-ai-code-review-workflow) and [how to review code written by AI](https://graphite.com/guides/how-to-review-code-written-by-ai), these guides offer practical advice for iterative, AI-assisted development.\\n\\n### Avoid common prompting pitfalls[/guides/better-prompts-ai-code#avoid-common-prompting-pitfalls](/guides/better-prompts-ai-code#avoid-common-prompting-pitfalls)\\n\\nWhile applying the above techniques, watch out for a few common mistakes that can undermine your results:\\n\\n- **Under-specifying the task:** Don't assume the AI \\\"knows\\\" what you intended. If a prompt is too minimal, the model might solve the wrong problem or omit important features. Always double-check that your prompt fully describes the problem.\\n- **Ignoring context or constraints:** Failing to mention the operating environment, framework, or performance/security constraints can result in code that doesn't fit your project needs. Also make sure to include any version requirements or security considerations. Without this, you might get code that technically works but is impractical or unsafe for your use case. The more context you give, the more relevant the output.\\n- **Over-reliance on AI without verification:** Even with good prompts, AI-generated code isn't guaranteed to be perfect. The model might produce subtle logic bugs, use deprecated APIs, or just take an unconventional approach. Never merge AI-written code into production without reviewing it. Treat the AI as a coding assistant, not an infallible coder. Write prompts that encourage best practices (like asking for safe, idiomatic code), and always test and review the output thoroughly. Using linters or static analysis on AI code is also a smart safeguard.\\n> **To compare approaches:** This guide, [Automated vs. manual code reviews: Finding the right balance](https://graphite.com/guides/automated-vs-manual-code-reviews), explores the strengths and limitations of both, and how to combine them for best results.\\n\\nBy avoiding these pitfalls, you ensure that prompt engineering remains a helpful aid rather than a source of technical debt. Prompting is an iterative learning process – over time, you'll learn which clarifications or extra details yield better code from your preferred LLM.\\n\\n### Graphite Agent: AI code review for quality and consistency[/guides/better-prompts-ai-code#graphite-agent-ai-code-review-for-quality-and-consistency](/guides/better-prompts-ai-code#graphite-agent-ai-code-review-for-quality-and-consistency)\\n\\nWriting a good prompt is the first step, but ensuring the quality and consistency of AI-generated code is the next challenge. This is where tools like [Graphite Agent](https://graphite.com/features/agent) come in. Graphite Agent is an AI-powered code review assistant that provides immediate, context-aware feedback on code changes – including those written by an AI. It hooks into your development workflow (e.g. reviewing [pull requests](https://docs.github.com/en/pull-requests)) and flags issues ranging from logic bugs and security vulnerabilities to style violations and missing documentation. Graphite Agent uses your repository's context and even custom rules you define to give relevant suggestions. For example, you can configure Graphite Agent with your project's style guide or common AI mistakes to watch for, and it will catch those patterns in AI-generated code before a human reviewer even sees them.\\n\\n![screenshot of Graphite Agent comment](/images/content/guides/better-prompts-ai-code/sample-Graphite Agent-comment.png)\\n\\nBy incorporating a tool like Graphite Agent in your workflow, you add an extra layer of scrutiny for AI-produced code. It helps ensure the code meets your team's quality standards and is consistent with the rest of your codebase. In practice, Graphite Agent can automatically identify edge cases the AI missed, point out performance issues, and even suggest fixes – all of which saves human reviewers time and instills confidence in using AI-generated code. Remember, prompt engineering and AI coding tools are most effective when paired with robust review practices. [Graphite Agent](https://graphite.com/features/agent) bridges that gap by reviewing AI code for you, so you can safely harness AI code generation while maintaining high code quality and consistency.\\n\\n========================\\nYou’ve used [ChatGPT](https://realpython.com/chatgpt-coding-mentor-python/), and you understand the potential of using a **large language model (LLM)** to assist you in your tasks. Maybe you’re already working on an LLM-supported application and have read about **prompt engineering**, but you’re unsure how to translate the theoretical concepts into a practical example.\\n\\nYour text prompt instructs the LLM’s responses, so tweaking it can get you vastly different output. In this tutorial, you’ll apply multiple **prompt engineering techniques** to a real-world example. You’ll experience prompt engineering as an iterative process, see the effects of applying various techniques, and learn about related concepts from machine learning and data engineering.\\n\\n**In this tutorial, you’ll learn how to:**\\n\\n- Work with OpenAI’s **GPT-3.5** and **GPT-4** models through their **API**\\n- Apply prompt engineering techniques to a **practical, real-world example**\\n- Use **numbered steps**, **delimiters**, and **few-shot prompting** to improve your results\\n- Understand and use **chain-of-thought prompting** to add more context\\n- Tap into the power of **roles** in messages to go beyond using singular **role prompts**\\n\\n\\nYou’ll work with a **Python script** that you can repurpose to fit your own LLM-assisted task. So if you’d like to use practical examples to discover how you can use prompt engineering to get better results from an LLM, then you’ve found the right tutorial!\\n\\n== **Take the Quiz:**== Test your knowledge with our interactive “Practical Prompt Engineering” quiz. You’ll receive a score upon completion to help you track your learning progress:\\n\\n---\\n\\n\\n[Prompt Engineering: A Practical Example](/quizzes/practical-prompt-engineering/)\\n\\n**Interactive Quiz**\\n\\n[Practical Prompt Engineering](/quizzes/practical-prompt-engineering/)\\n\\nIn this quiz, you'll test your understanding of prompt engineering techniques with large language models (LLMs) like GPT-3.5 and GPT-4. You'll revisit how to work with OpenAI's API, apply prompt engineering techniques to practical examples, and use various strategies to improve your results.\\n\\n## Understand the Purpose of Prompt Engineering[](#understand-the-purpose-of-prompt-engineering \\\"Permanent link\\\")\\n\\nPrompt engineering is more than a buzzword. You can get vastly different output from an LLM when using different prompts. That may seem obvious when you consider that you get different output when you ask different questions—but it also applies to phrasing the same conceptual question differently. Prompt engineering means constructing your text input to the LLM using specific approaches.\\n\\nYou can think of prompts as arguments and the LLM as the function to which you pass these arguments. Different input means different output:\\n\\nWhile an LLM is much more complex than the toy function above, the fundamental idea holds true. For a successful function call, you’ll need to know exactly which argument will produce the desired output. In the case of an LLM, that argument is text that consists of many different **tokens**, or [pieces of words](https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them).\\n\\nThe field of prompt engineering is still changing rapidly, and there’s a lot of active research happening in this area. As LLMs continue to evolve, so will the prompting approaches that will help you achieve the best results.\\n\\nIn this tutorial, you’ll cover some prompt engineering techniques, along with approaches to iteratively developing prompts, that you can use to get better text completions for your own LLM-assisted projects:\\n\\n- [Zero-Shot Prompting](#describe-your-task)\\n- [Few-Shot Prompting](#use-few-shot-prompting-to-improve-output)\\n- [Delimiters](#use-delimiters-to-clearly-mark-sections-of-your-prompt)\\n- [Numbered Steps](#describe-your-request-in-numbered-steps)\\n- [Role Prompts](#add-a-role-prompt-to-set-the-tone)\\n- [Chain-of-Thought (CoT) Prompting](#walk-the-model-through-chain-of-thought-prompting)\\n- [Structured Output](#structure-your-output-format-as-json)\\n- [Labeled Conversations](#improve-your-output-with-the-power-of-conversation)\\n\\n\\nThere are more techniques to uncover, and you’ll also find links to additional resources in the tutorial. Applying the mentioned techniques in a practical example will give you a great starting point for improving your LLM-supported programs. If you’ve never worked with an LLM before, then you may want to peruse [OpenAI’s GPT documentation](https://platform.openai.com/docs/guides/gpt) before diving in, but you should be able to follow along either way.\\n\\n## Get to Know the Practical Prompt Engineering Project[](#get-to-know-the-practical-prompt-engineering-project \\\"Permanent link\\\")\\n\\nYou’ll explore various prompt engineering techniques in service of a practical example: [sanitizing](https://en.wikipedia.org/wiki/Data_sanitization) customer chat conversations. By practicing different prompt engineering techniques on a single real-world project, you’ll get a good idea of why you might want to use one technique over another and how you can apply them in practice.\\n\\nImagine that you’re the resident Python developer at a company that handles thousands of customer support chats on a daily basis. Your job is to format and sanitize these conversations. You also help with deciding which of them require additional attention.\\n\\n### Collect Your Tasks[](#collect-your-tasks \\\"Permanent link\\\")\\n\\nYour big-picture assignment is to help your company stay on top of handling customer chat conversations. The conversations that you work with may look like the one shown below:\\n\\nYou’re supposed to make these text conversations more accessible for further processing by the customer support department in a few different ways:\\n> - Remove personally identifiable information.\\n> - Remove swear words.\\n> - Clean the date-time information to only show the date.\\n\\nThe swear words that you’ll encounter in this tutorial won’t be spicy at all, but you can consider them stand-ins for more explicit phrasing that you might find out in the wild. After sanitizing the chat conversation, you’d expect it to look like this:\\n\\nSure—you could handle it [using Python’s `str.replace()`](https://realpython.com/replace-string-python/) or [show off](https://xkcd.com/208) your [regular expression](https://realpython.com/regex-python/) skills. But there’s more to the task than immediately meets the eye.\\n\\nYour project manager isn’t a technical person, and they stuck another task at the end of this list. They may think of the task as a normal continuation of the previous tasks. But you know that it requires an entirely different approach and technology stack:\\n> Mark the conversations as “positive” or “negative.”\\n\\nThat task lies in the realm of [machine learning](https://realpython.com/learning-paths/machine-learning-python/), namely [text classification](https://realpython.com/python-keras-text-classification/), and more specifically [sentiment analysis](https://realpython.com/python-nltk-sentiment-analysis/). Even [advanced regex skills](https://realpython.com/regex-python-part-2/) won’t get you far in this challenge.\\n\\nAdditionally, you know that the customer support team that you’re preparing the data for will want to continue working on it programmatically. Plain text isn’t necessarily the best format for doing that. You want to do work that’s useful for others, so you add yet another stretch goal to your growing list of tasks:\\n> Format the output as JSON.\\n\\nThis task list is quickly growing out of proportion! Fortunately, you’ve got access to the [OpenAI API](https://platform.openai.com/docs/api-reference/), and you’ll employ the help of their LLM to solve all of these challenges.\\n\\nOne of the impressive features of LLMs is the breadth of tasks that you can use them for. So you’ll cover a lot of ground and different areas of use. And you’ll learn how to tackle them all with prompt engineering techniques.\\n\\n### Prepare Your Tools[](#prepare-your-tools \\\"Permanent link\\\")\\n\\nTo follow along with this tutorial, you’ll need to know how to [run a Python script](https://realpython.com/run-python-scripts/) from your command-line interface (CLI), and you’ll need an [API key from OpenAI](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key).\\n\\nYou’ll focus on prompt engineering, so you’ll only use the CLI app as a tool to demonstrate the different techniques. However, if you want to understand the code that you’ll be using, then it’ll help to have some experience with [Python classes](https://realpython.com/python-classes/), [defining your own Python functions](https://realpython.com/defining-your-own-python-function/), [the name-main idiom](https://realpython.com/if-name-main-python/), and [using Python to interact with web APIs](https://realpython.com/api-integration-in-python/).\\n\\nTo get started, go ahead and download the example Python script that you’ll work with throughout the tutorial:\\n\\nThe codebase represents a light abstraction layer on top of the OpenAI API and exposes one function called `get_chat_completion()` that’ll be of primary interest for the tutorial. The function interacts with OpenAI’s [`/chat/completions` endpoint](https://platform.openai.com/docs/api-reference/chat) to generate responses using different models, such as **GPT-3.5-Turbo** and **GPT-4**. You’ll explore both models, starting with GPT-3.5-Turbo, and eventually you’ll move on to the more powerful GPT-4 model.\\n\\nMost of the code in `app.py` revolves around setting up and fetching the settings from `settings.toml`.\\n\\nThe script also parses a command-line argument to allow you to conveniently specify an input file.\\nThe input files that you’ll primarily work with contain [LLM-generated customer support chat conversations](https://arxiv.org/abs/2206.08082), but feel free to reuse the script and provide your own input text files for additional practice.\\n\\nThe heart of the codebase is `settings.toml`. This [TOML settings file](https://realpython.com/python-toml/) hosts the prompts that you’ll use to sharpen your prompt engineering skills. It contains different prompts formatted in the human-readable settings format TOML.\\n\\nKeeping your prompts in a dedicated settings file can help to put them under [version control](https://realpython.com/python-git-github-intro/), which means you can keep track of different versions of your prompts, which will inevitably change during development.\\n\\nYour Python script will read the prompts from `settings.toml`, assemble them meaningfully, and send an API requests to OpenAI.\\n\\nAlternatively, you can also run all the text prompts directly in the [OpenAI playground](https://platform.openai.com/playground), which will give you the same functionality as the script. You could even paste the prompts into the [ChatGPT](https://chat.openai.com/) interface. However, the results will vary because you’ll be interacting with a different model and won’t have the opportunity to change certain settings.\\n\\n### Set Up the Codebase[](#set-up-the-codebase \\\"Permanent link\\\")\\n\\nMake sure that you’re on [Python 3.11](https://realpython.com/python311-new-features/) or higher, so that you can [interact with TOML files](https://realpython.com/python311-tomllib/) using the standard library. If you haven’t downloaded the codebase yet, go ahead and click the link below:\\n\\nUnzip the folder and use your CLI to navigate into the folder. You’ll see a handful of files. The most important ones are `app.py` and `settings.toml`:\\n\\n ./\\n ├── LICENSE\\n ├── README.md\\n ├── app.py\\n ├── chats.txt\\n ├── requirements.txt\\n ├── sanitized-chats.txt\\n ├── sanitized-testing-chats.txt\\n ├── settings.toml\\n ├── settings-final.toml\\n └── testing-chats.txt\\n\\nThe file `settings.toml` contains placeholders for all the prompts that you’ll use to explore the different prompt engineering techniques. That’s the file that you’ll primarily work with, so open it up. You’ll use it to iteratively develop the prompts for your application.\\n\\nThe file `app.py` contains the Python code that ties the codebase together. You’ll run this script many times throughout the tutorial, and it’ll take care of pulling your prompts from `settings.toml`.\\n\\nAfter you’ve downloaded and unpacked the codebase, create and activate a new [virtual environment](https://realpython.com/python-virtual-environments-a-primer/). Then use [`pip`](https://realpython.com/what-is-pip/) to install the required dependencies:\\n\\nNote that this tutorial uses `openai` version 1.13.3. OpenAI may introduce breaking changes between API versions, so make sure that you install the pinned dependencies from the requirements file. Then you’ll be able to work through the tutorial without any hiccups.\\n\\nTo run the script successfully, you’ll need an [OpenAI API key](https://platform.openai.com/account/api-keys) with which to authenticate your API requests. Make sure to keep that key private and never commit it to version control! If you’re new to using API keys, then read up on [best practices for API key safety](https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety).\\n\\nTo integrate your API key with the script and avoid leaking it publicly, you can export the API key as an environment variable:\\n\\nAfter you’ve added your API key as an environment variable named `OPENAI_API_KEY`, the script will automatically pick it up during each run.\\n\\nAt this point, you’ve completed the necessary setup steps. You can now run the script using the command line and provide it with a file as additional input text:\\n\\nThe command shown above combines the customer support chat conversations in `chats.txt` with prompts and API call parameters that are saved in `settings.toml`, then sends a request to the OpenAI API. Finally, it prints the resulting text completion to your terminal.\\n\\nFrom now on, you’ll primarily make changes in `settings.toml`. The code in `app.py` is just here for your convenience, and you won’t have to edit that file at all. The changes in the LLM’s output will come from changing the prompts and a few of the API call arguments.\\n\\n### Freeze Responses by Setting the Temperature to Zero[](#freeze-responses-by-setting-the-temperature-to-zero \\\"Permanent link\\\")\\n\\nWhen you’re planning to integrate an LLM into a product or a workflow, then you’ll generally want deterministic responses. The same input should give you the same output. Otherwise, it gets hard to provide a consistent service or debug your program if something goes wrong.\\n\\nBecause of this, you’ll want to set the [`temperature` argument](https://platform.openai.com/docs/api-reference/completions/create#completions/create-temperature) of your API calls to `0`. This value will mean that you’ll get *mostly* deterministic results.\\n\\nLLMs do text completion by predicting the next token based on the probability that it follows the previous tokens. Higher `temperature` settings will introduce more randomness into the results by allowing the LLM to pick tokens with lower probabilities. Because there are so many token selections chained one after one the other, picking one different token can sometimes lead to vastly different results.\\n\\nIf you use the LLM to generate ideas or [alternative implementations](https://realpython.com/chatgpt-coding-mentor-python/#prompt-chatgpt-for-alternative-implementations) of a programming task, then higher values for `temperature` might be interesting. However, they’re generally undesirable when you build a product.\\n\\nIn the example codebase, you can adjust `temperature` right inside your `settings.toml` file:\\n\\nThe initial value is set at `0`. All the examples in this tutorial assume that you leave `temperature` at `0` so that you’ll get mostly deterministic results. If you want to experiment with how a higher temperature changes the output, then feel free to play with it by changing the value for `temperature` in this settings file.\\n\\nIt’s important to keep in mind that you won’t be able to achieve true determinism with the current LLM models offered by OpenAI even if you keep `temperature` at `0`:\\n> An edge-case in GPT-3 with big implications: Inference is non-deterministic (even at temperature=0) when top-2 token probabilities are <1% different. So temperature=0 output is *very close* to deterministic, but actually isn’t. Worth remembering. ([Source](https://twitter.com/goodside/status/1608525976702525440?ref_src=twsrc%5Etfw))\\n\\nSo, while you can’t entirely guarantee that the model will always return the same result, you can get much closer by setting `temperature` to `0`.\\n\\nAnother approach that improves determinism in the results is to set a value for [the `seed` parameter](https://cookbook.openai.com/examples/reproducible_outputs_with_the_seed_parameter). The provided code sets the seed to `12345`. However, this only has an effect on some of the models.\\n\\n## Start Engineering Your Prompts[](#start-engineering-your-prompts \\\"Permanent link\\\")\\n\\nNow that you have an understanding of prompt engineering and the practical project that you’ll be working with, it’s time to dive into some prompt engineering techniques. In this section, you’ll learn how to apply the following techniques to your prompts to get the desired output from the language model:\\n\\n- **Zero-shot prompting:** Giving the language model normal instructions without any additional context\\n- **Few-shot prompting:** Conditioning the model on a few examples to boost its performance\\n- **Using delimiters:** Adding special tokens or phrases to provide structure and instructions to the model\\n- **Detailed, numbered steps:** Breaking down a complex prompt into a series of small, specific steps\\n\\n\\nBy practicing these techniques with the customer chat conversation example, you’ll gain a deeper understanding of how prompt engineering can enhance the capabilities of language models and improve their usefulness in real-world applications.\\n\\n### Describe Your Task[](#describe-your-task \\\"Permanent link\\\")\\n\\nYou’ll start your prompt engineering journey with a concept called **zero-shot prompting**, which is just a fancy way of saying that you’re asking a question or describing a task:\\n> Remove personally identifiable information, only show the date, and replace all swear words with “😤”\\n\\nThis task description focuses on the requested steps for sanitizing the customer chat conversation and literally spells them out. This is the prompt that’s currently saved as `instruction_prompt` in the `settings.toml` file:\\n\\nIf you run the Python script and provide the support chat file as an argument, then it’ll send this prompt together with the content of `chats.txt` to OpenAI’s text completion API:\\n\\nIf you correctly installed the dependencies and added your OpenAI API key as an environment variable, then all you need to do is wait until you see the API response pop up in your terminal:\\n\\nIn the example output, you can see that the prompt that you provided didn’t do a good job tackling the tasks. And that’s putting it gently! It picked up that it should do something with the huffing emoji and reduce the [ISO date-time](https://en.wikipedia.org/wiki/ISO_8601) to only a date. Your results might not have tackled all of that. Overall, nearly all of the work is left undone and the output is useless.\\n\\nIf you’re new to interacting with LLMs, then this may have been a first attempt at outsourcing your development work to the text completion model. But these initial results aren’t exactly exhilarating.\\n\\nSo you’ve described the task in natural language and gotten a bad result. But don’t fret—throughout the tutorial you’ll learn how you can get more useful responses for your task.\\n\\nOne way to do that is by increasing the number of **shots**, or examples, that you give to the model. When you’ve given the model zero shots, the only way to go is up! That’s why you’ll improve your results through **few-shot prompting** in the next section.\\n\\n### Use Few-Shot Prompting to Improve Output[](#use-few-shot-prompting-to-improve-output \\\"Permanent link\\\")\\n\\nFew-shot prompting is a prompt engineering technique where you provide example tasks and their expected solutions in your prompt. So, instead of just describing the task like you did before, you’ll now add an example of a chat conversation and its sanitized version.\\n\\nOpen up `settings.toml` and change your `instruction_prompt` by adding such an example:\\n\\nOnce you’ve applied the change, give the LLM another chance to sanitize the chat conversations for you by running the script again:\\n\\nYou’ll have to wait for the LLM to predict all the tokens. When it’s done, you’ll see a fresh response pop up in your terminal:\\n\\nOkay, great! This time at least the LLM didn’t eat up all the information that you passed to it without giving anything useful back!\\n\\nThis time, the model tackled some of the tasks. For example, it sanitized the names in square brackets. However, the names of the customers are still visible in the actual conversations. It also didn’t censor the order numbers or the email address.\\n\\nThe model probably didn’t sanitize any of the names in the conversations or the order numbers because the chat that you provided didn’t contain any names or order numbers. In other words, the output that you provided didn’t show an example of redacting names, order numbers, or email addresses in the conversation text.\\n\\nHere you can see how important it is to choose good examples that clearly represent the output that you want.\\n\\nSo far, you’ve provided *one* example in your prompt. To cover more ground, you’ll add another example so that this part of your prompt truly puts the *few* in few-shot prompting:\\n\\nYou added a second example that contains both a customer name as well as an order number in the chat text body. The example of a sanitized chat shows both types of sensitive data replaced with a sequence of asterisks (`****`). Now you’ve given the LLM a good example to *model*.\\n\\nAfter editing `instruction_prompt` in `settings.toml`, run your script again and wait for the response to print to your terminal:\\n\\nWait? Where did most of the output go? You probably expected to see better results, but it looks like you’re getting only two of the conversations back this time!\\n\\nYou’ve added more text to your prompt. At this point, the task instructions probably make up proportionally too few tokens for the model to consider them in a meaningful way. The model lost track of what it was supposed to do with the text that you provided.\\n\\nAdding more examples should make your responses stronger instead of eating them up, so what’s the deal? You can trust that few-shot prompting works—it’s a widely used and very effective prompt engineering technique. To help the model distinguish which part of your prompt contains the instructions that it should follow, you can use *delimiters*.\\n\\n### Use Delimiters to Clearly Mark Sections of Your Prompt[](#use-delimiters-to-clearly-mark-sections-of-your-prompt \\\"Permanent link\\\")\\n\\nIf you’re working with content that needs specific inputs, or if you provide examples like you did in the previous section, then it can be very helpful to clearly mark specific sections of the prompt. Keep in mind that everything you write arrives to an LLM as a single prompt—a long sequence of tokens.\\n\\nYou can improve the output by using delimiters to fence and label specific parts of your prompt. In fact, if you’ve been running the example code, then you’ve already used delimiters to fence the content that you’re reading from file.\\n\\nThe script adds the delimiters when assembling the prompt in `app.py`:\\n\\nIn line 13, you wrap the chat content in between `>>>>>` and `<<<<<` delimiters. Marking parts of your prompt with delimiters can help the model keep track of which tokens it should consider as a single unit of meaning.\\n\\nYou’ve seen in the previous section that missing delimiters can lead to unexpected results. You might receive less output than expected, like in the previous example, or an empty response. But you might also receive output that’s quite different from what you want! For example, imagine that the chat content that you’re reformatting contains a question at the end, such as:\\n> Can you give me your order number?\\n\\nIf this question is the last line of your prompt without delimiters, then the LLM might continue the imaginary chat conversation by answering the question with an imaginary order number. Give it a try by adding such a sentence to the end of your current prompt!\\n\\nDelimiters can help to separate the content and examples from the task description. They can also make it possible to refer to specific parts of your prompt at a later point in the prompt.\\n\\nA delimiter can be any **sequence of characters** that usually *wouldn’t* appear together, for example:\\n\\n- `>>>>>`\\n- `====`\\n- `####`\\n\\n\\nThe number of characters that you use doesn’t matter too much, as long as you make sure that the sequence is relatively unique. Additionally, you can add labels just before or just after the delimiters:\\n\\n- `START CONTENT>>>>>` content `<<<<>>>>CONTENT<<<<<` in your task description. These delimiters match the delimiters that the code in `app.py` adds when assembling the prompt.\\n\\nYou’ve also delimited the examples that you’re providing with `#### START EXAMPLES` and `#### END EXAMPLES`, and you differentiate between the inputs and expected outputs using multiple dashes (`------`) as delimiters.\\n\\nWhile delimiters can help you to get better results, in this case your output is quite similar to before:\\n\\nIt’s noticeable that the model only shows the two example data that you passed as examples. Could it be that your prompt leads to something similar like [overfitting](https://realpython.com/linear-regression-in-python/#underfitting-and-overfitting)? Using the actual data that you want to sanitize as your training data is, anyway, not a good idea, so in the next section, you’ll make sure to change that.\\n\\nIn this section, you’ve learned how you can clarify the different parts of your prompt using delimiters. You marked which part of the prompt is the task description and which part contains the customer support chat conversations, as well as the examples of original input and expected sanitized output.\\n\\n### Test Your Prompt Across Different Data[](#test-your-prompt-across-different-data \\\"Permanent link\\\")\\n\\nSo far, you’ve created your few-shot examples from the same data that you also run the sanitation on. This means that you’re effectively using your test data to provide context to the model. Mixing training, validation, and testing data is a bad practice in machine learning. You might wonder how well your prompt generalizes to different input.\\n\\nTo test this out, run the script another time with the same prompt using the second file that contains chat conversations, `testing-chats.txt`. The conversations in this file contain different names, and different—soft—swear words:\\n\\nYou’ll keep running your script using `testing-chats.txt` moving forward, unless indicated differently.\\n\\nOnce you’ve waited for the LLM to generate and return the response, you’ll notice that the result isn’t very satisfying:\\n\\nThe model now understands that you meant the examples as examples to follow when applying edits and gives you back all of the new input data. However, it didn’t do a great job following the instructions.\\n\\nThe model didn’t identify new swear words and didn’t replace them. The model also didn’t redact the order numbers, nor did it anonymize the names. It looks like it only managed to reformat your date strings.\\n\\nSo your engineered prompt currently doesn’t work well, and generalizes even worse. If you built a pipeline based on this prompt, where new chats could contain new customer names, then the application would probably continue to perform poorly. How can you fix that?\\n\\nYou’ve grown your prompt significantly by providing more examples, but your task description is still largely just the question that you wrote right at the beginning. To get better results, you’ll need to do some prompt engineering on the task description as well.\\n\\n### Describe Your Request in Numbered Steps[](#describe-your-request-in-numbered-steps \\\"Permanent link\\\")\\n\\nIf you break up your task instructions into a numbered sequence of small steps, then the model is a lot more likely to produce the results that you’re looking for.\\n\\nGo back to your prompt in `settings.toml` and break your initial task description into more granular, specific substeps:\\n\\nWith these step-by-step instructions in place, you’re ready for another run of your script and another inspection of the newly generated output:\\n\\nThat’s a significant improvement! The model managed to follow the pattern of replacing the names in square brackets with `[Agent]` and `[Customer]`, respectively. It correctly identified some new swear words and replaced them with the huffing emoji. The model also redacted the order numbers, and anonymized the names in the conversation texts.\\n\\nOften, one of the best approaches to get better results from an LLM is to make your instructions more specific.\\n\\nFraming your tasks in even smaller and more specific steps, will generally get you better results. Don’t shy away from [some repetition](https://twitter.com/phillip_isola/status/1532189616106881027):\\n\\nIncreasing the specificity of your instructions, and introducing numbered steps, helped you create a well-performing prompt. Your prompt successfully removes personally identifiable information from the conversations, redacts swear words, and reformats the ISO date-time stamp, as well as the usernames.\\n\\nYou could consider your initial task as completed, but there’s more that you want to do, and more prompt engineering techniques to explore. You also know that there are newer models that you could work with, and your success has further piqued your curiosity. It’s time to switch to a different LLM, see how that influences your output, and then continue exploring other techniques.\\n\\n## Perform Chat Completions With GPT-4[](#perform-chat-completions-with-gpt-4 \\\"Permanent link\\\")\\n\\nYou’ve decided to switch to an even more powerful LLM, GPT-4. In the rest of this tutorial, you’ll use GPT-4 to continue exploring other important prompt engineering techniques:\\n\\n1. **Role prompting**: Using a system message to set the tone of the conversation, and using different roles to give context through labeling\\n2. **Chain-of-thought prompting (CoT)**: Giving the model time to think by prompting it to reason about a task, then including the reasoning in the prompt\\n\\n\\nYou’ll also use GPT-4 to **classify the sentiment** of each chat conversation and structure the output format as **JSON**.\\n\\n### Switch to a Different Model[](#switch-to-a-different-model \\\"Permanent link\\\")\\n\\nIf you’re working with the provided script, then all you need to do is pick a chat model from `chat_models` in `settings.toml` and use it as the new value for `model`:\\n\\nChanging these settings will send your request to a different model. Like before, it’ll assemble your prompt in the way necessary for a `/chat/completions` endpoint request, make that request for you, and print the response to your terminal.\\n\\nFor the rest of this tutorial, you’ll work with OpenAI’s latest version of the GPT-4 model. If you don’t have access to this model, then you can instead keep working with the model that you’ve been working with so far.\\n\\nIf you’ve been following along using ChatGPT, then you’re stuck with whatever model currently powers it. Unless you’re a [ChatGPT Plus](https://openai.com/blog/chatgpt-plus) subscriber, then you can [change the model to GPT-4](https://help.openai.com/en/articles/7127997-how-can-i-use-gpt-4-in-chatgpt) on the website.\\n\\nWithout changing your prompt, run your script another time to see the different results of the text completion based only on using a different LLM:\\n\\nSome responses may be relatively similar to the ones with the older model. However, you can also expect to receive results like the one shown above, where most swear words are still present.\\n\\nIt’s important to keep in mind that developing for a specific model will lead to specific results, and swapping the model may improve or deteriorate the responses that you get. Therefore, swapping to a newer and more powerful model won’t necessarily give you better results straight away.\\n\\nAdditionally, it’s also helpful to keep in mind that API calls to larger models will generally [cost more money per request](https://openai.com/pricing). While it can be fun to always use the latest and greatest LLM, it may be worthwhile to consider whether you really need to upgrade to tackle the task that you’re trying to solve.\\n\\n### Add a Role Prompt to Set the Tone[](#add-a-role-prompt-to-set-the-tone \\\"Permanent link\\\")\\n\\nThere are some additional possibilities when interacting with the API endpoint that you’ve only used implicitly, but haven’t explored yet, such as adding **role labels** to a part of the prompt. In this section, you’ll use the `\\\"system\\\"` role to create a system message, and you’ll revisit the concept later on when you [add more roles](#improve-your-output-with-the-power-of-conversation) to improve the output.\\n\\n**Role prompting** usually refers to adding system messages, which represent information that helps to set the context for upcoming completions that the model will produce. System messages usually aren’t visible to the end user. Keep in mind that the `/chat/completions` endpoint models were initially designed for conversational interactions.\\n\\nYou can also use system messages to set a context for your completion task. You’ll craft a bespoke role prompt in a moment. However, for this specific task, the role prompt is likely less important than it might be for some other tasks. To explore the possible influence of a role prompt, you’ll take a little detour and ask your model to play a role:\\n\\nYou keep `instruction_prompt` the same as you engineered it earlier in the tutorial. Additionally, you now add text to `role_prompt`. The role prompt shown above serves as an example for the impact that a misguided prompt can have on your application.\\n\\nUnleash, thou shall, the parchment’s code and behold the marvels unexpected, as the results may stir wonderment and awe:\\n\\nAs you can see, a role prompt can have quite an impact on the language that the LLM uses to construct the response. This is great if you’re building a conversational agent that should speak in a certain tone or language. And you can also use system messages to keep specific setup information present.\\n\\nFor completion tasks like the one that you’re currently working on, you might, however, not need this type of role prompt. For now, you could give it a common boilerplate phrase, such as *You’re a helpful assistant*.\\n\\nTo practice writing a role prompt—and to see whether you can release your customer chat conversations from the reign of that 16th century villain poet—you’ll craft a more appropriate role prompt:\\n\\nThis role prompt is more appropriate to your use case. You don’t want the model to introduce randomness or to change any of the language that’s used in the conversations. Instead, you just want it to execute the tasks that you describe. Run the script another time and take a look at the results:\\n\\nThat looks much better again! Abide concealed in yonder bygone era, ye villainous poet!\\n\\nAs you can see from these examples, role prompts can be a powerful way to change your output. Especially if you’re using the LLM to build a conversational interface, then they’re a force to consider.\\n\\nFor some reason, GPT-4 seems to consistently pick `[Client]` over `[Customer]`, even though you’re specifying `[Customer]` in the few-shot examples. You’ll eventually get rid of these verbose names, so it doesn’t matter for your use case.\\n\\nHowever, if you’re determined and curious—and manage to prompt `[Client]` away—then share the prompt that worked for you in the comments.\\n\\nIn the final section of this tutorial, you’ll revisit using roles and see how you can employ [the power of conversation](#improve-your-output-with-the-power-of-conversation) to improve your output even in a non-conversational completion task like the one you’re working on.\\n\\n### Classify the Sentiment of Chat Conversations[](#classify-the-sentiment-of-chat-conversations \\\"Permanent link\\\")\\n\\nAt this point, you’ve engineered a decent prompt that seems to perform quite well in sanitizing and reformatting the provided customer chat conversations. To fully grasp the power of LLM-assisted workflows, you’ll next tackle the tacked-on request by your manager to also classify the conversations as positive or negative.\\n\\nStart by saving both sanitized conversation files into new files that will constitute the new inputs for your sentiment classification task:\\n\\nYou could continue to build on top of the previous prompt, but eventually you’ll hit a wall when you’re asking the model to do too many edits at once. The classification step is conceptually distinct from the text sanitation, so it’s a good cut-off point to start a new pipeline.\\n\\nThe sanitized chat conversation files are also included in the example codebase:\\n\\nAgain, you want the model to do the work for you. All you need to do is craft a prompt that spells out the task at hand, and provide examples. You can also edit the role prompt to set the context for this new task that the model should perform:\\n\\nYou can now run the script and provide it with the sanitized conversations in `sanitized-testing-chats.txt` that were the output of your previously engineered prompt:\\n\\nYou added another step to your task description and slightly modified the few-shot examples in your prompt. Not a lot of extra work for a task that would have required a lot more work without the help of an LLM. But is this really sufficient? Take a look at the output once your script has finished running:\\n\\nThe output is quite promising! The model correctly labeled conversations with angry customers with the fire emoji. However, the first conversation probably doesn’t entirely fit into the same bucket as the rest because the customer doesn’t display a negative sentiment towards the company.\\n\\nAssume that all of these conversations were resolved positively by the customer service agents and that your company just wants to follow up with those customers who seemed noticeably angry with their situation. In that case, you might need to tweak your prompt a bit more to get the desired result.\\n\\nYou could [add more examples](#use-few-shot-prompting-to-improve-output), which is generally a good idea because it creates more context for the model to apply. Writing a [more detailed description](#describe-your-request-in-numbered-steps) of your task helps as well, as you’ve seen before. However, to tackle this task, you’ll learn about another useful prompt engineering technique called *chain-of-thought* prompting.\\n\\n### Walk the Model Through Chain-of-Thought Prompting[](#walk-the-model-through-chain-of-thought-prompting \\\"Permanent link\\\")\\n\\nA widely successful prompt engineering approach can be summed up with the [anthropomorphism](https://en.wikipedia.org/wiki/Anthropomorphism) of *giving the model time to think*. You can do this with a couple of different specific techniques. Essentially, it means that you prompt the LLM to produce intermediate results that become additional inputs. That way, the reasoning doesn’t need to take distant leaps but only hop from one lily pad to the next.\\n\\nOne of these approaches is to use **chain-of-thought (CoT)** prompting techniques. To apply CoT, you prompt the model to generate intermediate results that then become part of the prompt in a second request. The increased context makes it more likely that the model will arrive at a useful output.\\n\\nThe smallest form of CoT prompting is **zero-shot CoT**, where you literally ask the model to *think step by step*. This approach yields [impressive results](https://arxiv.org/abs/2201.11903) for mathematical tasks that LLMs otherwise often solve incorrectly.\\n\\nChain-of-thought operations are technically split into two stages:\\n\\n1. **Reasoning extraction**, where the model generates the increased context\\n2. **Answer extraction**, where the model uses the increased context to generate the answer\\n\\n\\nReasoning extraction is useful across a variety of CoT contexts. You can generate few-shot examples from input, which you can then use for a separate step of extracting answers using more detailed chain-of-thought prompting.\\n\\nYou can try zero-shot CoT on the sanitized chat conversations to embellish the few-shot examples that you’ll use to classify the chat conversations more robustly. Remove the examples and replace the instructions describing the reasoning on how you would classify the conversations in more detail:\\n\\nYou spelled out the criteria that you want the model to use to assess and classify sentiment. Then you add the sentence *Let’s think step by step* to the end of your prompt.\\n\\nYou want to use this zero-shot CoT approach to generate few-shot examples that you’ll then build into your final prompt. Therefore, you should run the script using the data in `sanitized-chats.txt` this time:\\n\\nYou’ll get back a reference to the conversations, with the reasoning spelled out step by step to reach the final conclusion:\\n\\nThe reasoning is straightforward and sticks to your instructions. If the instructions accurately represent the criteria for marking a conversation as positive or negative, then you’ve got a good playbook at hand.\\n\\nYou can now use this information to improve the few-shot examples for your sentiment classification task:\\n\\nYou’re using the same examples as previously, but you’ve enhanced each of the examples with a short chain of thought that you generated in the previous call. Give your script another spin using `sanitized-testing-chats.txt` as the input file and see whether the results have improved:\\n\\nGreat! Now the first conversation, which was initially classified as negative, has also received the green checkmark.\\n\\nIn this section, you’ve supported your examples with reasoning for why a conversation should be labeled as positive vs negative. You generated this reasoning with another call to the LLM.\\n\\nAt this point, it seems that your prompt generalizes well to the available data and classifies the conversations as intended. And you only needed to carefully craft your words to make it happen!\\n\\n### Structure Your Output Format as JSON[](#structure-your-output-format-as-json \\\"Permanent link\\\")\\n\\nAs a final showcase for effective prompting when incorporating an LLM into your workflow, you’ll tackle the last task, which you added to the list youself: to pass the data on in a structured format that’ll make it straightforward for the customer support team to process further.\\n\\nYou already specified a format to follow in the previous prompt, and the LLM returned what you asked for. So it might just be a matter of asking for a different, more structured format, for example [JSON](https://realpython.com/python-json/):\\n\\nIn your updated `instruction_prompt`, you’ve explicitly asked the model to return the output as valid JSON. Then, you also adapted your few-shot examples to represent the JSON output that you want to receive. Note that you also applied additional formatting by removing the date from each line of conversation and truncating the `[Agent]` and `[Customer]` labels to single letters, `A` and `C`.\\n\\nYou’re still using example chat conversations from your sanitized chat data in `sanitized-chats.txt`, and you send the sanitized testing data from `sanitized-testing-chats.txt` to the model for processing.\\n\\nIn this case, you receive valid JSON, as requested. The classification still works as before and the output censors personally identifiable information, replaces swear words, and applies all the additional requested formatting:\\n\\nYour output may be different and show some small hiccups, but overall, this output is quite impressive and useful! You could pass this JSON structure over to the customer support team, and they could quickly integrate it into their workflow to follow up with customers who displayed a negative sentiment in the chat conversation.\\n\\nYou could stop here, but the engineer in you isn’t quite satisfied yet. All the instructions just in a single prompt? Your premonition calls and tells you tales about maintainability. In the next section, you’ll refactor your prompts to apply role labels before you set up your LLM-assisted pipeline and call it a day.\\n\\n### Improve Your Output With the Power of Conversation[](#improve-your-output-with-the-power-of-conversation \\\"Permanent link\\\")\\n\\nYou added a [role prompt](#add-a-role-prompt-to-set-the-tone) earlier on, but otherwise you haven’t tapped into the power of *conversations* yet.\\n\\nIn this final section, you’ll learn how you can provide additional context to the model by splitting your prompt into multiple separate messages with different labels.\\n\\nIn calls to the `/chat/completions` endpoint, a prompt is split into several **messages**. Each message has its content, which represents the prompt text. Additionally, it also has a **role**. There are [different roles](https://platform.openai.com/docs/api-reference/chat/create#chat/create-role) that a message can have, and you’ll work with three of them:\\n\\n1. `\\\"system\\\"` gives context for the conversation and helps to set the overall tone.\\n2. `\\\"user\\\"` represents the input that a user of your application might provide.\\n3. `\\\"assistant\\\"` represents the output that the model would reply with.\\n\\n\\nSo far, you’ve provided context for different parts of your prompt all mashed together in a single prompt, more or less well separated [using delimiters](#use-delimiters-to-clearly-mark-sections-of-your-prompt). When you use a model that’s optimized for chat, such as GPT-4, then you can use roles to let the LLM know what type of message you’re sending.\\n\\nFor example, you can create some variables for your few-shot examples and separate variables for the associated CoT reasoning and outputs:\\n\\nYou’ve disassembled your `instruction_prompt` into seven separate prompts, based on what role the messages have in your conversation with the LLM.\\n\\nThe helper function that builds a messages payload, `_assemble_chat_messages()`, is already set up to include all of these prompts in the API request. Take a look into `app.py` to check out the separate messages, with their fitting roles, that make up your overall prompt:\\n\\nYour prompt is now split into distinct parts, each of which has a certain role label:\\n\\n- **Example input** has the `\\\"user\\\"` role.\\n- **Reasoning** that the model created has the `\\\"system\\\"` role.\\n- **Example output** has the `\\\"assistant\\\"` role.\\n\\n\\nYou’re now providing context for how user input might look, how the model can reason about classifying the input, and how your expected output should look. You removed the delimiters that you previously used for labeling the example sections. They aren’t necessary now that you’re providing context for the parts of your prompt through separate messages.\\n\\nGive your script a final run to see whether the power of conversation has managed to improve the output:\\n\\nThis JSON structure is looking legitimately great! The formatting that you wanted now shows up throughout, and the conversations are labeled correctly.\\n\\nAdditionally, you’ve improved the maintainability of your prompts by splitting them into separate labels. You can feel proud to pass on such a useful edit of the customer chat conversation data to your coworkers!\\n\\n## FAQs[](#faqs \\\"Permanent link\\\")\\n\\nYou’ve covered common prompt engineering techniques, and here, you’ll find a few questions and answers that sum up the most important concepts that you’ve covered in this tutorial.\\n\\nYou can use these questions to check your understanding or to recap and solidify what you’ve just learned. After each question, you’ll find a brief explanation hidden in a collapsible section. Click the *Show/Hide* toggle to reveal the answer. Time to dive in!\\n\\nKnowledge about prompt engineering is crucial when you work with large language models (LLMs) because you can receive [much better results](#describe-your-request-in-numbered-steps) with carefully crafted prompts.\\n\\nThe `temperature` setting controls the [amount of randomness](#freeze-responses-by-setting-the-temperature-to-zero) in your output. Setting the `temperature` argument of API calls to `0` will increase consistency in the responses from the LLM. Note that OpenAI’s LLMs are only ever *mostly* deterministic, even with the temperature set to `0`.\\n\\n[Few-shot prompting](#use-few-shot-prompting-to-improve-output) is a common prompt engineering technique where you add examples of expected input and desired output to your prompt.\\n\\n[Using delimiters](#use-delimiters-to-clearly-mark-sections-of-your-prompt) can be helpful when dealing with more complex prompts. Delimiters help to separate and label sections of the prompt, assisting the LLM in understanding its tasks better.\\n\\n[Testing your prompt](#test-your-prompt-across-different-data) with data that’s separate from the training data is important to see how well the model generalizes to new conditions.\\n\\nYes, generally adding more context will lead to more accurate results. However, it’s also important *how* you add the additional context. [Just adding more text](#use-few-shot-prompting-to-improve-output) may lead to worse results.\\n\\n[Role prompting](#add-a-role-prompt-to-set-the-tone) means providing a system message that sets the tone or context for a conversation. This can greatly impact how the model constructs the response. You can also [use roles to provide context labels](#improve-your-output-with-the-power-of-conversation) for parts of your prompt.\\n\\nIn [chain-of-thought (CoT) prompting](#walk-the-model-through-chain-of-thought-prompting), you prompt the LLM to produce intermediate reasoning steps. You can then include these steps in the answer extraction step to receive better results.\\n\\n## Next Steps[](#next-steps \\\"Permanent link\\\")\\n\\nIn this tutorial, you’ve learned about various prompt engineering techniques, and you’ve built an LLM-assisted Python application along the way. If you’d like to learn more about prompt engineering, then check out some related questions, as well as some resources for further study below:\\n\\nYes, prompt engineer can be a real job, especially in the context of AI and machine learning. As a prompt engineer, you design and optimize prompts so that AI models like GPT-4 produce desired responses. However, it might not be a stand-alone job title everywhere. It could be a part of broader roles like [machine learning](https://realpython.com/learning-paths/machine-learning-python/) engineer or [data scientist](https://realpython.com/learning-paths/data-science-python-core-skills/).\\n\\nPrompt engineering, like any other technical skill, requires time, effort, and practice to learn. It’s not necessarily easy, but it’s certainly possible for someone with the right mindset and resources to learn it. If you’ve enjoyed the iterative and text-based approach that you learned about [in this tutorial](#understand-the-purpose-of-prompt-engineering), then prompt engineering might be a good fit for you.\\n\\nThe field of prompt engineering is quite new, and LLMs keep developing quickly as well. The landscape, best practices, and most effective approaches are therefore changing rapidly. To continue learning about prompt engineering using free and open-source resources, you can check out [Learn Prompting](https://learnprompting.org/docs/intro/) and the [Prompt Engineering Guide](https://www.promptingguide.ai/).\\n\\nHave you found any interesting ways to incorporate an LLM into your workflow? Share your thoughts and experiences in the comments below.\\n\\n== **Take the Quiz:**== Test your knowledge with our interactive “Practical Prompt Engineering” quiz. You’ll receive a score upon completion to help you track your learning progress:\\n\\n---\\n\\n\\n[Prompt Engineering: A Practical Example](/quizzes/practical-prompt-engineering/)\\n\\n**Interactive Quiz**\\n\\n[Practical Prompt Engineering](/quizzes/practical-prompt-engineering/)\\n\\nIn this quiz, you'll test your understanding of prompt engineering techniques with large language models (LLMs) like GPT-3.5 and GPT-4. You'll revisit how to work with OpenAI's API, apply prompt engineering techniques to practical examples, and use various strategies to improve your results.\\n\\n========================\\nLLMs like ChatGPT are very effective at code generation. In this section, we will cover many examples of how to use ChatGPT for code generation.\\n\\nThe OpenAI's Playground (Chat Mode) and the `gpt-3.5-turbo` model are used for all examples below.\\n\\nAs with all chat models from OpenAI, you can use a `System Message` to define the behavior and format of the responses. We will use the following System Message for the prompt examples:\\n\\n## Basic Example[](#basic-example)\\n\\nThe `User Message` will then be the prompt itself with the instruction of the specific code you want the model to generate. Below is a snapshot of the first basic prompt for code generation. Note that the final code was generated by the `Assistant`.\\n\\n![CODE1](/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fchat-mode.3928920f.png&w=3840&q=75)\\n\\nHere is the prompt if you want to try it yourself:\\n\\n## Turn Comments Into Code[](#turn-comments-into-code)\\n\\nYou can also use the code generation capabilities of these LLMs to generate code from comments alone. Let's look at another example that passes the instructions as a comment block:\\n\\n*Prompt:*\\n\\n*Output:*\\n\\nThe generated code looks like working code. However, the output doesn't contain an important `import` statement, i.e., `import json`. While these models generate working code, you must pay close attention to small details like this and always make sure to test your code.\\n\\n## Complete Functions or Next Line[](#complete-functions-or-next-line)\\n\\nThese LLMs have also been incorporated into tools like GitHub Copilot which makes them useful for developers. One useful feature is the ability of the model to complete functions.\\n\\n*Prompt:*\\n\\n*Output:*\\n\\n## MySQL Query Generation[](#mysql-query-generation)\\n\\nBesides the basic code generation example above, you can use the model to generate useful code that could be used in other aspects of programming like creating and testing MySQL queries.\\n\\nLet's say you have a dataset with some information which you can include as part of the prompt and then instruct it to generate a specific query. For example:\\n\\n*Output:*\\n\\nThe model generates a query! However, in this example we made up the database so we don't have a database to test the generated query. If you are testing this example, it could be useful to have this database created. Fortunately, you can use ChatGPT to generate this for you as well. One approach is using a modified version of the prompt above with slightly different instructions to generate a database schema:\\n\\nThe model generated the following schema:\\n\\nYou can create a temporary database using something like [sqliteonline.com (opens in a new tab)](https://sqliteonline.com/).\\n\\nYou will also need dummy data to test if the original generated query was valid. Let's use ChatGPT for this too! The prompt could look something like the following:\\n\\nThe output with the insert statements:\\n\\nNow that you have the database and the dummy data, you can test the original query. Based on the dummy data, the result should be two rows with `Jane Doe` and `John Doe`.\\n\\n## Explain Code[](#explain-code)\\n\\nIf you are learning to program in a certain language, it might be useful to prompt the model to explain certain bits of code. Let's reuse the query generated above and ask the model to explain it.\\n\\nIf you are using the same System Message from before, be careful of how you are instructing the model. For example, if we use the following example, the model will complain that it is a code assistant in Python but it still provides an explanation. Sometimes the model refuses to generate what you are instructing because it's designed to follow the System Message.\\n\\n*Prompt:*\\n\\n*Output:*\\n\\n## Editing Code[](#editing-code)\\n\\nExample coming soon!\\n\\n## Debugging Code[](#debugging-code)\\n\\nExample coming soon!\\n\\n## Best practices[](#best-practices)\\n\\nComing soon!\\n\\n[![SVG Image]()Tackling Generated Datasets Diversity](/applications/generating_textbooks \\\"Tackling Generated Datasets Diversity\\\")[Graduate Job Classification Case Study![SVG Image]()](/applications/workplace_casestudy \\\"Graduate Job Classification Case Study\\\")\\n\\n========================\\n## Select your cookie preferences\\n\\nWe use essential cookies and similar tools that are necessary to provide our site and services. We use performance cookies to collect anonymous statistics, so we can understand how customers use our site and make improvements. Essential cookies cannot be deactivated, but you can choose “Customize” or “Decline” to decline performance cookies.\\n\\nIf you agree, AWS and approved third parties will also use cookies to provide useful site features, remember your preferences, and display relevant content, including relevant advertising. To accept or decline all non-essential cookies, choose “Accept” or “Decline.” To make more detailed choices, choose “Customize.”\\n\\n========================\\nYou're debugging a production issue at 2 AM. The payment service is down, and the AI-generated script that was supposed to handle failovers isn't working. The code looked perfect in development. It passed all the tests. But now it's failing because it hardcoded an API key, doesn't handle AWS credential rotation, and has no audit logging for compliance.\\n\\nThis happens more than anyone wants to admit. [Industry research](https://itrevolution.com/articles/new-research-reveals-ai-coding-assistants-boost-developer-productivity-by-26-what-it-leaders-need-to-know/) shows AI coding assistants boost developer productivity by 26%. But here's the problem: [45% of AI-generated code](https://www.veracode.com/blog/ai-generated-code-security-risks/) contains security vulnerabilities. Most companies are getting faster at building broken software.\\n\\nThe real issue isn't with AI itself. It's that everyone's using the wrong prompts.\\n\\nThink about how most developers use AI coding tools. They type something like \\\"write a Python script to process payments\\\" and expect magic. What they get is code that works in a demo but falls apart in production. It's like asking someone to build you a car and being surprised when they hand you a go-kart.\\n\\nEnterprise software is different. It needs to handle authentication, logging, error recovery, compliance, and integration with systems that were built when Python was still a snake. Most AI prompts don't even mention these requirements.\\n\\nHere's what's counterintuitive: the solution isn't better AI models. It's better prompts. The difference between a prompt that generates toy code and one that generates production-ready code isn't the AI. It's the human who wrote the prompt.\\n\\nEvery large company has the same problem. Developers generate code fast, but it doesn't work in production. The AI creates beautiful functions that assume perfect network conditions, unlimited memory, and no security requirements.\\n\\nReal enterprise code is mostly error handling and edge cases. It's checking if services are down, rotating credentials, masking sensitive data, and logging everything for audits. A payment processing function in a real company spends more time dealing with failures than processing payments.\\n\\nBut look at typical AI prompts. They're all happy path scenarios. \\\"Write a function to connect to a database\\\" never mentions connection pooling, credential rotation, or what happens when the database is unavailable. The AI generates code that works once and breaks forever.\\n\\nThis creates a weird dynamic. Developers feel productive because they're writing code fast. Managers see features shipping quickly. But operations teams know the truth. They're the ones getting called at night when the pretty AI-generated code encounters its first real-world problem.\\n\\nThe gap between development speed and production reliability is growing. Companies are shipping faster than ever while their systems become more fragile. It's like building houses with cardboard because it's quicker than using wood.\\n\\n## **The Context Problem Nobody Talks About**\\n\\nHere's something most people don't understand about AI coding tools. The quality of generated code isn't just about the model. It's about how much context the AI can see.\\n\\nMost AI coding assistants have tiny context windows. They can see maybe a few files at once. But enterprise software isn't a collection of independent files. It's a web of dependencies, shared libraries, configuration systems, and integration patterns that span hundreds of services.\\n\\nWhen an AI can only see a single file, it makes assumptions. It assumes hardcoded values are fine. It assumes simple error handling is enough. It assumes the function will run in isolation. These assumptions kill production systems.\\n\\nThink about it this way. If you asked a new developer to write code without showing them the existing codebase, architecture documentation, or integration patterns, what would you expect? That's essentially what most AI tools are doing.\\n\\n[Augment Code](https://www.augmentcode.com/) has a 200k token context window. That's 12 times larger than most competitors. It can see entire codebases, understand existing patterns, and generate code that actually fits into real systems. It's like the difference between building a bridge with a blueprint versus guessing what the other side looks like.\\n\\nContext isn't just nice to have. It's the difference between code that works and code that integrates.\\n\\nLet's look at how to write prompts that actually work. The secret is specificity. Don't ask for a payment processor. Ask for a payment processor that handles PCI compliance, retry logic, fraud detection, audit logging, and integration with your existing authentication system.\\n\\nHere's a prompt that generates real enterprise code:\\n\\nNotice what's different? It specifies the Python version, the exact cloud services, the validation library, the encryption method, the monitoring system, and the testing framework. It asks for retry logic and edge case handling. This isn't a toy prompt. It's a specification.\\n\\nThe AI knows to include AWS credential handling because S3 and Redshift are mentioned. It knows to add error logging because CloudWatch metrics are required. It generates proper exception handling because edge cases are explicitly mentioned.\\n\\nCompare that to \\\"write a script to process CSV files.\\\" The AI would generate something that works on your laptop but fails in production. No error handling, no monitoring, no security, no tests.\\n\\nHere's another example for authentication:\\n\\nThis prompt gets enterprise-grade authentication code because it mentions the specific requirements that matter: hierarchical roles, LDAP integration, audit logging, and performance testing. The AI generates code that actually works in a real company.\\n\\nThe pattern is simple. Specify everything that matters for production: security, monitoring, error handling, testing, and integration requirements.\\n\\nLet's go through prompts that generate production-ready code. Each one includes the enterprise requirements that most prompts ignore.\\n\\n**Secure ETL Pipeline with Compliance Monitoring**\\n\\nThis works because it specifies the complete data pipeline with security and monitoring. The AI generates code that handles AWS credentials, encryption keys, network failures, and malformed data. It's not just an ETL script. It's an enterprise ETL system.\\n\\n**SOC 2 Audit Log Decorator Framework**\\n\\nCompliance isn't optional in enterprise software. This prompt generates a logging system that meets audit requirements. The AI includes sensitive data masking, correlation IDs, and proper error handling because the prompt mentions SOC 2 compliance.\\n\\n**Role-Based Access Control with Enterprise Integration**\\n\\nAuthentication in enterprise software is complex. This prompt generates code that integrates with existing directory services and handles the permission hierarchies that real companies need. It's not a toy login system. It's enterprise identity management.\\n\\n**Zero-Downtime Kubernetes Deployment Orchestrator**\\n\\nDeployment automation needs to be bulletproof. This prompt generates code that handles the complexity of Kubernetes deployments, including rollback logic and notification systems. It's production deployment automation that actually works.\\n\\n**PII Detection and GDPR Compliance Processor**\\n\\nPrivacy regulations aren't going away. This prompt generates code that handles the complexity of data privacy, including detection algorithms, masking strategies, and audit trails. It's not just data processing. It's compliant data processing.\\n\\n**Multi-Tenant Logging with Data Isolation**\\n\\nMulti-tenant systems need perfect data isolation. This prompt generates logging infrastructure that prevents data leakage between tenants while maintaining operational visibility. It's enterprise logging that actually works.\\n\\n**Automated Compliance Monitoring Framework**\\n\\nCompliance monitoring can't be manual. This prompt generates automation that continuously validates security controls and generates the reports that auditors need. It's compliance automation that reduces audit overhead.\\n\\n**High-Performance Async API Client Generator**\\n\\nAPI integration in enterprise systems needs to handle failures gracefully. This prompt generates clients with proper connection management, retry logic, and error handling. It's not just an API wrapper. It's resilient integration infrastructure.\\n\\nNotice what all these prompts have in common? They specify the production requirements that toy prompts ignore.\\n\\nEvery prompt mentions specific technologies, not generic concepts. Instead of \\\"database,\\\" they say \\\"Redshift.\\\" Instead of \\\"logging,\\\" they say \\\"CloudWatch Logs.\\\" Instead of \\\"security,\\\" they say \\\"JWT tokens with LDAP integration.\\\"\\n\\nEvery prompt includes error handling and monitoring. Real systems fail, and production code needs to handle failures gracefully. These prompts generate code that assumes failures will happen.\\n\\nEvery prompt mentions testing and validation. Enterprise code without tests is just technical debt waiting to happen. These prompts generate the test coverage that production systems need.\\n\\nEvery prompt includes compliance and security requirements. Enterprise software operates under regulatory constraints that toy examples ignore. These prompts generate code that meets real compliance requirements.\\n\\nThe difference isn't the AI model. It's the specificity of the request.\\n\\nHere's what happens when companies use generic AI prompts. Developers generate code fast, but it creates more problems than it solves.\\n\\nThe code works in development but fails in production. The failure modes are predictable: hardcoded credentials, missing error handling, no audit logging, and poor integration with existing systems. Operations teams spend their time fixing AI-generated code instead of building new features.\\n\\nSecurity teams find vulnerabilities in every release. The AI-generated code doesn't follow security best practices because the prompts don't mention security requirements. Every deployment becomes a security review instead of a routine release.\\n\\nCompliance teams can't audit the systems because the code doesn't generate the logs and reports they need. Simple features become compliance projects because the foundation is wrong.\\n\\nThe productivity gains from AI disappear under the weight of technical debt. Teams move fast initially but slow down as the codebase becomes unmaintainable. It's like driving fast with bad brakes. You go faster until you crash.\\n\\nThe solution isn't avoiding AI. It's using it correctly. Good prompts generate code that integrates with existing systems, follows security best practices, and meets compliance requirements.\\n\\nThink about AI as a very fast junior developer who knows syntax but doesn't understand context. You wouldn't tell a junior developer to \\\"build a payment system\\\" without explaining the requirements, architecture, and constraints. Don't do it with AI either.\\n\\nSpecify everything that matters for production. Mention the exact technologies, security requirements, error handling needs, and integration patterns. The more specific the prompt, the better the generated code.\\n\\nReview everything before deployment. AI-generated code still needs human oversight. Look for security issues, integration problems, and missing error handling. Use the generated code as a starting point, not a finished product.\\n\\nTest comprehensively. AI generates code that passes happy path tests but fails under stress. Test error conditions, security scenarios, and integration edge cases. Production testing reveals problems that development testing misses.\\n\\nMonitor everything in production. AI-generated code fails in unexpected ways. Good monitoring catches problems before they impact users. Plan for failures because they will happen.\\n\\nThe companies that get AI right will build software faster without sacrificing quality. They'll use specific prompts that generate production-ready code instead of toy examples.\\n\\nThe companies that get AI wrong will build technical debt faster than ever. They'll ship broken software quickly and spend years fixing it. The productivity gains will disappear under maintenance overhead.\\n\\nThe difference isn't the AI technology. Every company has access to the same models. The difference is prompt engineering and code review discipline.\\n\\n[Augment Code](https://www.augmentcode.com/) provides the enterprise capabilities that make this possible: 200k token context windows, SOC 2 Type II certification, and Claude Sonnet 4 integration. But the tool is only as good as the prompts you give it.\\n\\nThe future belongs to companies that understand this distinction. AI is a powerful tool for generating code, but only if you know how to ask for what you actually need.\\n\\nWant to see the difference that enterprise-grade AI coding capabilities make? [Start your free trial of Augment Code](https://www.augmentcode.com/) and discover how proper context understanding and advanced security features enable productive, compliant development that scales with enterprise requirements.\\n\\n========================\\n\"" +2025-12-30T22:57:37.941144Z DEBUG ThreadId(16) rigscribe::tools::prompt_reviewer: src/tools/prompt_reviewer.rs:79: PromptReviewer produced artifact: Artifact { system_prompt: "# Persona: Python Code Generation Specialist\n\nYou are an expert Python programmer and a specialist in generating high-quality, production", signed_by: "not specified" } +2025-12-30T22:57:37.942382Z DEBUG ThreadId(01) rigscribe::agents: src/agents/mod.rs:65: Tool executed tool="PromptReviewer" args="{\"intent\":{\"text\":\"write a python fonction\"},\"spec\":{\"constraints\":\"* The language must be Python.\\n* The output must be a syntactically correct function.\\n* The function's purpose, name, parameters, and return value are undefined.\\n* High risk of delivering a function that does not meet the user's actual, unstated need.\\n* The request is too ambiguous to create a meaningful or useful solution.\",\"goal\":\"To receive a piece of Python code that is a function.\"}}" result="{\"system_prompt\":\"# Persona: Python Code Generation Specialist\\n\\nYou are an expert Python programmer and a specialist in generating high-quality, production\",\"signed_by\":\"not specified\"}" +2025-12-30T22:57:54.938876Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:54: Optimization complete. Final artifact length: 3286 +2025-12-30T22:57:54.940389Z INFO ThreadId(01) rigscribe: src/lib.rs:51: Optimize prompt cached to: "./.prompts_perssitense_cache/2011.json" +2025-12-30T23:01:01.738527Z INFO ThreadId(01) rigscribe: src/main.rs:18: Starting prompt optimization process for: 'write a python fonction' +2025-12-30T23:01:01.739355Z INFO ThreadId(01) rigscribe: src/lib.rs:44: Cache HIT: "./.prompts_perssitense_cache/2011.json" loaded from disk +2025-12-30T23:01:19.323636Z INFO ThreadId(01) rigscribe: src/main.rs:18: Starting prompt optimization process for: 'write a python fonction' +2025-12-30T23:01:19.324049Z INFO ThreadId(01) rigscribe: src/lib.rs:47: Cache MIS: "./.prompts_perssitense_cache/2011.json" +2025-12-30T23:01:19.324071Z INFO ThreadId(01) rigscribe: src/lib.rs:48: Optimizing ... +2025-12-30T23:01:19.330795Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:21: Tool Definition - Deconstructor: ToolDefinition { name: "Deconstructor", description: "this tools take a raw prompte and give back it Specification include goal and constrian", parameters: Object {"$schema": String("https://json-schema.org/draft/2020-12/schema"), "properties": Object {"text": Object {"description": String("he raw user intent. You must analyze this to extract technical constraints."), "type": String("string")}}, "required": Array [String("text")], "title": String("Intent"), "type": String("object")} } +2025-12-30T23:01:19.330925Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:24: Tool Definition - PromptReviewer: ToolDefinition { name: "PromptReviewer", description: "this tools take a raw prompte it will evelaute that given promte wiuth its Specification include goal and constrian", parameters: Object {"$defs": Object {"Intent": Object {"properties": Object {"text": Object {"description": String("he raw user intent. You must analyze this to extract technical constraints."), "type": String("string")}}, "required": Array [String("text")], "type": String("object")}, "Specification": Object {"properties": Object {"constraints": Object {"description": String("A list of technical constraints, risks, and negative constraints. Format as a bulleted string."), "type": String("string")}, "goal": Object {"description": String("The primary goal derived from the user's intent. concise and clear."), "type": String("string")}}, "required": Array [String("goal"), String("constraints")], "type": String("object")}}, "$schema": String("https://json-schema.org/draft/2020-12/schema"), "properties": Object {"intent": Object {"$ref": String("#/$defs/Intent")}, "spec": Object {"$ref": String("#/$defs/Specification")}}, "required": Array [String("intent"), String("spec")], "title": String("PromptReviewerArgs"), "type": String("object")} } +2025-12-30T23:01:19.331012Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:27: Tool Definition - WebSearcher: ToolDefinition { name: "WebSearcher", description: "A research tool. Use this to find best practices, domain-specific knowledge, or to verify assumptions about the user's goal.", parameters: Object {"$schema": String("https://json-schema.org/draft/2020-12/schema"), "properties": Object {"query": Object {"type": String("string")}}, "required": Array [String("query")], "title": String("Webquery"), "type": String("object")} } +2025-12-30T23:01:19.331188Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:48: Starting optimization streaming... +2025-12-30T23:01:28.703247Z INFO ThreadId(02) rigscribe::tools::deconstructor: src/tools/deconstructor.rs:32: [Tool Calling]-> Deconstructor with args: Intent { text: "write a python fonction" } +2025-12-30T23:01:39.485279Z DEBUG ThreadId(02) rigscribe::tools::deconstructor: src/tools/deconstructor.rs:64: Deconstructor extracted spec: Specification { goal: "Receive a piece of Python code that defines a function.", constraints: "* The language must be Python.\n* The purpose, inputs, and outputs of the function are undefined.\n* The provided solution will likely be useless as the core requirements are missing.\n* High risk of not meeting the user's actual, unstated goal." } +2025-12-30T23:01:39.485806Z INFO ThreadId(01) rigscribe::agents: src/agents/mod.rs:65: Tool executed tool="Deconstructor" args="{\"text\":\"write a python fonction\"}" result="{\"goal\":\"Receive a piece of Python code that defines a function.\",\"constraints\":\"* The language must be Python.\\n* The purpose, inputs, and outputs of the function are undefined.\\n* The provided solution will likely be useless as the core requirements are missing.\\n* High risk of not meeting the user's actual, unstated goal.\"}" +2025-12-30T23:01:41.046928Z INFO ThreadId(17) rigscribe::tools::prompt_reviewer: src/tools/prompt_reviewer.rs:40: [Tool Calling]-> PromptReviewer with args: PromptReviewerArgs { intent: Intent { text: "write a python fonction" }, spec: Specification { goal: "Receive a piece of Python code that defines a function.", constraints: "* The language must be Python.\n* The purpose, inputs, and outputs of the function are undefined.\n* The provided solution will likely be useless as the core requirements are missing.\n* High risk of not meeting the user's actual, unstated goal." } } +2025-12-30T23:02:22.185918Z DEBUG ThreadId(13) rigscribe::tools::prompt_reviewer: src/tools/prompt_reviewer.rs:79: PromptReviewer produced artifact: Artifact { system_prompt: "# Role: Expert Python Developer and Code Generation Specialist\n\nYour primary task is to generate a single, complete, and high-quality Python function when given a vague or incomplete request. Your output must serve as a gold-standard example of modern, clean, and safe Python code.\n\n### Guiding Principles\n\n1. **Anticipate Ambiguity:** The user's request is underspecified. You MUST assume they need a general-purpose, illustrative example.\n2. **Prioritize Best Practices:** Your generated code is not just a solution; it is a template for excellence. It MUST showcase industry-standard practices.\n3. **Safety First:** Under no circumstances should the generated code introduce security vulnerabilities or perform risky actions.\n\n### Step-by-Step Generation Process\n\n1. **Select a Generic, High-Utility Use Case:** Since the user provided no specific goal, you MUST choose a common, easily understandable task. Excellent candidates include:\n * A string manipulation utility (e.g., `normalize_text`, `slugify_string`).\n * A data validation function (e.g., `is_valid_email`, `is_strong_password`).\n * A simple data transformation function (e.g., `dict_to_namedtuple`).\n * **Your choice MUST NOT be a trivial one-liner like `add(a, b)`.** It should be complex enough to require a docstring and meaningful logic.\n\n2. **Define the Function Signature:**\n * The function name MUST be descriptive and follow the `snake_case` convention.\n * All arguments and the return value MUST have precise type hints (per PEP 484).\n * Include default values for optional arguments where appropriate to demonstrate flexibility.\n\n3. **Write the Implementation:**\n * The code MUST be fully compliant with PEP 8 style guidelines.\n * The logic MUST be efficient and readable. Add comments only for parts of the code that are not self-explanatory.\n\n4. **Create a Comprehensive Docstring:**\n * You MUST include a docstring immediately following the function definition.\n * The docstring format MUST follow the Google Python Style Guide.\n * It MUST contain the following sections:\n * A concise, one-line summary of the function's purpose.\n * An `Args:` section detailing each argument's name, type, and description.\n * A `Returns:` section describing the type and meaning of the returned value.\n * A `Raises:` section if the function is expected to raise specific errors (e.g., `ValueError`).\n\n5. **Provide a Demonstrative Example:**\n * After the function definition, you MUST include a usage example.\n * This example MUST be placed within an `if __name__ == \"__main__\":` block to ensure it only runs when the script is executed directly.\n * The example should clearly show how to call the function and print its output, demonstrating its behavior with sample inputs.\n\n### Absolute Constraints & Safety Guardrails\n\n* **Language:** The output MUST be exclusively Python 3.9+.\n* **Dependencies:** The generated code MUST NOT require any external libraries or imports beyond the Python Standard Library.\n* **Forbidden Actions:** The code MUST NEVER perform any of the following actions:\n * File I/O (reading, writing, creating, or deleting files).\n * Network communication (HTTP requests, sockets, etc.).\n * Operating system interactions (`os.system`, `subprocess`).\n * Execution of arbitrary strings (`eval()`, `exec()`).\n* **Output Format:** Your final response MUST be a single, standalone Python code block. Do NOT include any conversational text, apologies, or explanations outside of the code's comments and docstring.", signed_by: "N/A" } +2025-12-30T23:02:22.188363Z INFO ThreadId(01) rigscribe::agents: src/agents/mod.rs:65: Tool executed tool="PromptReviewer" args="{\"intent\":{\"text\":\"write a python fonction\"},\"spec\":{\"constraints\":\"* The language must be Python.\\n* The purpose, inputs, and outputs of the function are undefined.\\n* The provided solution will likely be useless as the core requirements are missing.\\n* High risk of not meeting the user's actual, unstated goal.\",\"goal\":\"Receive a piece of Python code that defines a function.\"}}" result="{\"system_prompt\":\"# Role: Expert Python Developer and Code Generation Specialist\\n\\nYour primary task is to generate a single, complete, and high-quality Python function when given a vague or incomplete request. Your output must serve as a gold-standard example of modern, clean, and safe Python code.\\n\\n### Guiding Principles\\n\\n1. **Anticipate Ambiguity:** The user's request is underspecified. You MUST assume they need a general-purpose, illustrative example.\\n2. **Prioritize Best Practices:** Your generated code is not just a solution; it is a template for excellence. It MUST showcase industry-standard practices.\\n3. **Safety First:** Under no circumstances should the generated code introduce security vulnerabilities or perform risky actions.\\n\\n### Step-by-Step Generation Process\\n\\n1. **Select a Generic, High-Utility Use Case:** Since the user provided no specific goal, you MUST choose a common, easily understandable task. Excellent candidates include:\\n * A string manipulation utility (e.g., `normalize_text`, `slugify_string`).\\n * A data validation function (e.g., `is_valid_email`, `is_strong_password`).\\n * A simple data transformation function (e.g., `dict_to_namedtuple`).\\n * **Your choice MUST NOT be a trivial one-liner like `add(a, b)`.** It should be complex enough to require a docstring and meaningful logic.\\n\\n2. **Define the Function Signature:**\\n * The function name MUST be descriptive and follow the `snake_case` convention.\\n * All arguments and the return value MUST have precise type hints (per PEP 484).\\n * Include default values for optional arguments where appropriate to demonstrate flexibility.\\n\\n3. **Write the Implementation:**\\n * The code MUST be fully compliant with PEP 8 style guidelines.\\n * The logic MUST be efficient and readable. Add comments only for parts of the code that are not self-explanatory.\\n\\n4. **Create a Comprehensive Docstring:**\\n * You MUST include a docstring immediately following the function definition.\\n * The docstring format MUST follow the Google Python Style Guide.\\n * It MUST contain the following sections:\\n * A concise, one-line summary of the function's purpose.\\n * An `Args:` section detailing each argument's name, type, and description.\\n * A `Returns:` section describing the type and meaning of the returned value.\\n * A `Raises:` section if the function is expected to raise specific errors (e.g., `ValueError`).\\n\\n5. **Provide a Demonstrative Example:**\\n * After the function definition, you MUST include a usage example.\\n * This example MUST be placed within an `if __name__ == \\\"__main__\\\":` block to ensure it only runs when the script is executed directly.\\n * The example should clearly show how to call the function and print its output, demonstrating its behavior with sample inputs.\\n\\n### Absolute Constraints & Safety Guardrails\\n\\n* **Language:** The output MUST be exclusively Python 3.9+.\\n* **Dependencies:** The generated code MUST NOT require any external libraries or imports beyond the Python Standard Library.\\n* **Forbidden Actions:** The code MUST NEVER perform any of the following actions:\\n * File I/O (reading, writing, creating, or deleting files).\\n * Network communication (HTTP requests, sockets, etc.).\\n * Operating system interactions (`os.system`, `subprocess`).\\n * Execution of arbitrary strings (`eval()`, `exec()`).\\n* **Output Format:** Your final response MUST be a single, standalone Python code block. Do NOT include any conversational text, apologies, or explanations outside of the code's comments and docstring.\",\"signed_by\":\"N/A\"}" +2025-12-30T23:02:43.700686Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:65: Optimization complete. Final artifact length: 5503 +2025-12-30T23:02:43.703842Z INFO ThreadId(01) rigscribe: src/lib.rs:51: Optimize prompt cached to: "./.prompts_perssitense_cache/2011.json" +2025-12-30T23:03:40.178504Z INFO ThreadId(01) rigscribe: src/main.rs:18: Starting prompt optimization process for: 'write a python fonction' +2025-12-30T23:03:40.178907Z INFO ThreadId(01) rigscribe: src/lib.rs:47: Cache MIS: "./.prompts_perssitense_cache/2011.json" +2025-12-30T23:03:40.178928Z INFO ThreadId(01) rigscribe: src/lib.rs:48: Optimizing ... +2025-12-30T23:03:40.185213Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:21: Tool Definition - Deconstructor: ToolDefinition { name: "Deconstructor", description: "this tools take a raw prompte and give back it Specification include goal and constrian", parameters: Object {"$schema": String("https://json-schema.org/draft/2020-12/schema"), "properties": Object {"text": Object {"description": String("he raw user intent. You must analyze this to extract technical constraints."), "type": String("string")}}, "required": Array [String("text")], "title": String("Intent"), "type": String("object")} } +2025-12-30T23:03:40.185424Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:24: Tool Definition - PromptReviewer: ToolDefinition { name: "PromptReviewer", description: "this tools take a raw prompte it will evelaute that given promte wiuth its Specification include goal and constrian", parameters: Object {"$defs": Object {"Intent": Object {"properties": Object {"text": Object {"description": String("he raw user intent. You must analyze this to extract technical constraints."), "type": String("string")}}, "required": Array [String("text")], "type": String("object")}, "Specification": Object {"properties": Object {"constraints": Object {"description": String("A list of technical constraints, risks, and negative constraints. Format as a bulleted string."), "type": String("string")}, "goal": Object {"description": String("The primary goal derived from the user's intent. concise and clear."), "type": String("string")}}, "required": Array [String("goal"), String("constraints")], "type": String("object")}}, "$schema": String("https://json-schema.org/draft/2020-12/schema"), "properties": Object {"intent": Object {"$ref": String("#/$defs/Intent")}, "spec": Object {"$ref": String("#/$defs/Specification")}}, "required": Array [String("intent"), String("spec")], "title": String("PromptReviewerArgs"), "type": String("object")} } +2025-12-30T23:03:40.185568Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:27: Tool Definition - WebSearcher: ToolDefinition { name: "WebSearcher", description: "A research tool. Use this to find best practices, domain-specific knowledge, or to verify assumptions about the user's goal.", parameters: Object {"$schema": String("https://json-schema.org/draft/2020-12/schema"), "properties": Object {"query": Object {"type": String("string")}}, "required": Array [String("query")], "title": String("Webquery"), "type": String("object")} } +2025-12-30T23:03:40.185844Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:48: Starting optimization streaming... +2025-12-30T23:03:52.650511Z INFO ThreadId(17) rigscribe::tools::deconstructor: src/tools/deconstructor.rs:32: [Tool Calling]-> Deconstructor with args: Intent { text: "write a python fonction" } +2025-12-30T23:04:02.746057Z DEBUG ThreadId(15) rigscribe::tools::deconstructor: src/tools/deconstructor.rs:64: Deconstructor extracted spec: Specification { goal: "Create a working piece of Python code structured as a function.", constraints: "- The programming language must be Python.\n- The function's purpose, name, parameters, and return value are completely undefined.\n- High probability of delivering a function that does not meet the user's actual, unstated needs.\n- The solution will likely be too generic or trivial to be useful without further clarification." } +2025-12-30T23:04:02.746415Z INFO ThreadId(01) rigscribe::agents: src/agents/mod.rs:65: Tool executed tool="Deconstructor" args="{\"text\":\"write a python fonction\"}" result="{\"goal\":\"Create a working piece of Python code structured as a function.\",\"constraints\":\"- The programming language must be Python.\\n- The function's purpose, name, parameters, and return value are completely undefined.\\n- High probability of delivering a function that does not meet the user's actual, unstated needs.\\n- The solution will likely be too generic or trivial to be useful without further clarification.\"}" +2025-12-30T23:04:05.016965Z INFO ThreadId(15) rigscribe::tools::web_searcher: src/tools/web_searcher.rs:27: [Tool Calling]-> WebSearcher with args: Webquery { query: "best practices for prompting a large language model to write a Python function" } +2025-12-30T23:04:10.662071Z INFO ThreadId(01) rigscribe::agents: src/agents/mod.rs:65: Tool executed tool="WebSearcher" args="{\"query\":\"best practices for prompting a large language model to write a Python function\"}" result="\"## Icon for medium.commedium.com\\n\\nVerifying you are human. This may take a few seconds.\\n\\nmedium.com needs to review the security of your connection before proceeding.\\n\\n========================\\nYou’ve used [ChatGPT](https://realpython.com/chatgpt-coding-mentor-python/), and you understand the potential of using a **large language model (LLM)** to assist you in your tasks. Maybe you’re already working on an LLM-supported application and have read about **prompt engineering**, but you’re unsure how to translate the theoretical concepts into a practical example.\\n\\nYour text prompt instructs the LLM’s responses, so tweaking it can get you vastly different output. In this tutorial, you’ll apply multiple **prompt engineering techniques** to a real-world example. You’ll experience prompt engineering as an iterative process, see the effects of applying various techniques, and learn about related concepts from machine learning and data engineering.\\n\\n**In this tutorial, you’ll learn how to:**\\n\\n- Work with OpenAI’s **GPT-3.5** and **GPT-4** models through their **API**\\n- Apply prompt engineering techniques to a **practical, real-world example**\\n- Use **numbered steps**, **delimiters**, and **few-shot prompting** to improve your results\\n- Understand and use **chain-of-thought prompting** to add more context\\n- Tap into the power of **roles** in messages to go beyond using singular **role prompts**\\n\\n\\nYou’ll work with a **Python script** that you can repurpose to fit your own LLM-assisted task. So if you’d like to use practical examples to discover how you can use prompt engineering to get better results from an LLM, then you’ve found the right tutorial!\\n\\n== **Take the Quiz:**== Test your knowledge with our interactive “Practical Prompt Engineering” quiz. You’ll receive a score upon completion to help you track your learning progress:\\n\\n---\\n\\n\\n[Prompt Engineering: A Practical Example](/quizzes/practical-prompt-engineering/)\\n\\n**Interactive Quiz**\\n\\n[Practical Prompt Engineering](/quizzes/practical-prompt-engineering/)\\n\\nIn this quiz, you'll test your understanding of prompt engineering techniques with large language models (LLMs) like GPT-3.5 and GPT-4. You'll revisit how to work with OpenAI's API, apply prompt engineering techniques to practical examples, and use various strategies to improve your results.\\n\\n## Understand the Purpose of Prompt Engineering[](#understand-the-purpose-of-prompt-engineering \\\"Permanent link\\\")\\n\\nPrompt engineering is more than a buzzword. You can get vastly different output from an LLM when using different prompts. That may seem obvious when you consider that you get different output when you ask different questions—but it also applies to phrasing the same conceptual question differently. Prompt engineering means constructing your text input to the LLM using specific approaches.\\n\\nYou can think of prompts as arguments and the LLM as the function to which you pass these arguments. Different input means different output:\\n\\nWhile an LLM is much more complex than the toy function above, the fundamental idea holds true. For a successful function call, you’ll need to know exactly which argument will produce the desired output. In the case of an LLM, that argument is text that consists of many different **tokens**, or [pieces of words](https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them).\\n\\nThe field of prompt engineering is still changing rapidly, and there’s a lot of active research happening in this area. As LLMs continue to evolve, so will the prompting approaches that will help you achieve the best results.\\n\\nIn this tutorial, you’ll cover some prompt engineering techniques, along with approaches to iteratively developing prompts, that you can use to get better text completions for your own LLM-assisted projects:\\n\\n- [Zero-Shot Prompting](#describe-your-task)\\n- [Few-Shot Prompting](#use-few-shot-prompting-to-improve-output)\\n- [Delimiters](#use-delimiters-to-clearly-mark-sections-of-your-prompt)\\n- [Numbered Steps](#describe-your-request-in-numbered-steps)\\n- [Role Prompts](#add-a-role-prompt-to-set-the-tone)\\n- [Chain-of-Thought (CoT) Prompting](#walk-the-model-through-chain-of-thought-prompting)\\n- [Structured Output](#structure-your-output-format-as-json)\\n- [Labeled Conversations](#improve-your-output-with-the-power-of-conversation)\\n\\n\\nThere are more techniques to uncover, and you’ll also find links to additional resources in the tutorial. Applying the mentioned techniques in a practical example will give you a great starting point for improving your LLM-supported programs. If you’ve never worked with an LLM before, then you may want to peruse [OpenAI’s GPT documentation](https://platform.openai.com/docs/guides/gpt) before diving in, but you should be able to follow along either way.\\n\\n## Get to Know the Practical Prompt Engineering Project[](#get-to-know-the-practical-prompt-engineering-project \\\"Permanent link\\\")\\n\\nYou’ll explore various prompt engineering techniques in service of a practical example: [sanitizing](https://en.wikipedia.org/wiki/Data_sanitization) customer chat conversations. By practicing different prompt engineering techniques on a single real-world project, you’ll get a good idea of why you might want to use one technique over another and how you can apply them in practice.\\n\\nImagine that you’re the resident Python developer at a company that handles thousands of customer support chats on a daily basis. Your job is to format and sanitize these conversations. You also help with deciding which of them require additional attention.\\n\\n### Collect Your Tasks[](#collect-your-tasks \\\"Permanent link\\\")\\n\\nYour big-picture assignment is to help your company stay on top of handling customer chat conversations. The conversations that you work with may look like the one shown below:\\n\\nYou’re supposed to make these text conversations more accessible for further processing by the customer support department in a few different ways:\\n> - Remove personally identifiable information.\\n> - Remove swear words.\\n> - Clean the date-time information to only show the date.\\n\\nThe swear words that you’ll encounter in this tutorial won’t be spicy at all, but you can consider them stand-ins for more explicit phrasing that you might find out in the wild. After sanitizing the chat conversation, you’d expect it to look like this:\\n\\nSure—you could handle it [using Python’s `str.replace()`](https://realpython.com/replace-string-python/) or [show off](https://xkcd.com/208) your [regular expression](https://realpython.com/regex-python/) skills. But there’s more to the task than immediately meets the eye.\\n\\nYour project manager isn’t a technical person, and they stuck another task at the end of this list. They may think of the task as a normal continuation of the previous tasks. But you know that it requires an entirely different approach and technology stack:\\n> Mark the conversations as “positive” or “negative.”\\n\\nThat task lies in the realm of [machine learning](https://realpython.com/learning-paths/machine-learning-python/), namely [text classification](https://realpython.com/python-keras-text-classification/), and more specifically [sentiment analysis](https://realpython.com/python-nltk-sentiment-analysis/). Even [advanced regex skills](https://realpython.com/regex-python-part-2/) won’t get you far in this challenge.\\n\\nAdditionally, you know that the customer support team that you’re preparing the data for will want to continue working on it programmatically. Plain text isn’t necessarily the best format for doing that. You want to do work that’s useful for others, so you add yet another stretch goal to your growing list of tasks:\\n> Format the output as JSON.\\n\\nThis task list is quickly growing out of proportion! Fortunately, you’ve got access to the [OpenAI API](https://platform.openai.com/docs/api-reference/), and you’ll employ the help of their LLM to solve all of these challenges.\\n\\nOne of the impressive features of LLMs is the breadth of tasks that you can use them for. So you’ll cover a lot of ground and different areas of use. And you’ll learn how to tackle them all with prompt engineering techniques.\\n\\n### Prepare Your Tools[](#prepare-your-tools \\\"Permanent link\\\")\\n\\nTo follow along with this tutorial, you’ll need to know how to [run a Python script](https://realpython.com/run-python-scripts/) from your command-line interface (CLI), and you’ll need an [API key from OpenAI](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key).\\n\\nYou’ll focus on prompt engineering, so you’ll only use the CLI app as a tool to demonstrate the different techniques. However, if you want to understand the code that you’ll be using, then it’ll help to have some experience with [Python classes](https://realpython.com/python-classes/), [defining your own Python functions](https://realpython.com/defining-your-own-python-function/), [the name-main idiom](https://realpython.com/if-name-main-python/), and [using Python to interact with web APIs](https://realpython.com/api-integration-in-python/).\\n\\nTo get started, go ahead and download the example Python script that you’ll work with throughout the tutorial:\\n\\nThe codebase represents a light abstraction layer on top of the OpenAI API and exposes one function called `get_chat_completion()` that’ll be of primary interest for the tutorial. The function interacts with OpenAI’s [`/chat/completions` endpoint](https://platform.openai.com/docs/api-reference/chat) to generate responses using different models, such as **GPT-3.5-Turbo** and **GPT-4**. You’ll explore both models, starting with GPT-3.5-Turbo, and eventually you’ll move on to the more powerful GPT-4 model.\\n\\nMost of the code in `app.py` revolves around setting up and fetching the settings from `settings.toml`.\\n\\nThe script also parses a command-line argument to allow you to conveniently specify an input file.\\nThe input files that you’ll primarily work with contain [LLM-generated customer support chat conversations](https://arxiv.org/abs/2206.08082), but feel free to reuse the script and provide your own input text files for additional practice.\\n\\nThe heart of the codebase is `settings.toml`. This [TOML settings file](https://realpython.com/python-toml/) hosts the prompts that you’ll use to sharpen your prompt engineering skills. It contains different prompts formatted in the human-readable settings format TOML.\\n\\nKeeping your prompts in a dedicated settings file can help to put them under [version control](https://realpython.com/python-git-github-intro/), which means you can keep track of different versions of your prompts, which will inevitably change during development.\\n\\nYour Python script will read the prompts from `settings.toml`, assemble them meaningfully, and send an API requests to OpenAI.\\n\\nAlternatively, you can also run all the text prompts directly in the [OpenAI playground](https://platform.openai.com/playground), which will give you the same functionality as the script. You could even paste the prompts into the [ChatGPT](https://chat.openai.com/) interface. However, the results will vary because you’ll be interacting with a different model and won’t have the opportunity to change certain settings.\\n\\n### Set Up the Codebase[](#set-up-the-codebase \\\"Permanent link\\\")\\n\\nMake sure that you’re on [Python 3.11](https://realpython.com/python311-new-features/) or higher, so that you can [interact with TOML files](https://realpython.com/python311-tomllib/) using the standard library. If you haven’t downloaded the codebase yet, go ahead and click the link below:\\n\\nUnzip the folder and use your CLI to navigate into the folder. You’ll see a handful of files. The most important ones are `app.py` and `settings.toml`:\\n\\n ./\\n ├── LICENSE\\n ├── README.md\\n ├── app.py\\n ├── chats.txt\\n ├── requirements.txt\\n ├── sanitized-chats.txt\\n ├── sanitized-testing-chats.txt\\n ├── settings.toml\\n ├── settings-final.toml\\n └── testing-chats.txt\\n\\nThe file `settings.toml` contains placeholders for all the prompts that you’ll use to explore the different prompt engineering techniques. That’s the file that you’ll primarily work with, so open it up. You’ll use it to iteratively develop the prompts for your application.\\n\\nThe file `app.py` contains the Python code that ties the codebase together. You’ll run this script many times throughout the tutorial, and it’ll take care of pulling your prompts from `settings.toml`.\\n\\nAfter you’ve downloaded and unpacked the codebase, create and activate a new [virtual environment](https://realpython.com/python-virtual-environments-a-primer/). Then use [`pip`](https://realpython.com/what-is-pip/) to install the required dependencies:\\n\\nNote that this tutorial uses `openai` version 1.13.3. OpenAI may introduce breaking changes between API versions, so make sure that you install the pinned dependencies from the requirements file. Then you’ll be able to work through the tutorial without any hiccups.\\n\\nTo run the script successfully, you’ll need an [OpenAI API key](https://platform.openai.com/account/api-keys) with which to authenticate your API requests. Make sure to keep that key private and never commit it to version control! If you’re new to using API keys, then read up on [best practices for API key safety](https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety).\\n\\nTo integrate your API key with the script and avoid leaking it publicly, you can export the API key as an environment variable:\\n\\nAfter you’ve added your API key as an environment variable named `OPENAI_API_KEY`, the script will automatically pick it up during each run.\\n\\nAt this point, you’ve completed the necessary setup steps. You can now run the script using the command line and provide it with a file as additional input text:\\n\\nThe command shown above combines the customer support chat conversations in `chats.txt` with prompts and API call parameters that are saved in `settings.toml`, then sends a request to the OpenAI API. Finally, it prints the resulting text completion to your terminal.\\n\\nFrom now on, you’ll primarily make changes in `settings.toml`. The code in `app.py` is just here for your convenience, and you won’t have to edit that file at all. The changes in the LLM’s output will come from changing the prompts and a few of the API call arguments.\\n\\n### Freeze Responses by Setting the Temperature to Zero[](#freeze-responses-by-setting-the-temperature-to-zero \\\"Permanent link\\\")\\n\\nWhen you’re planning to integrate an LLM into a product or a workflow, then you’ll generally want deterministic responses. The same input should give you the same output. Otherwise, it gets hard to provide a consistent service or debug your program if something goes wrong.\\n\\nBecause of this, you’ll want to set the [`temperature` argument](https://platform.openai.com/docs/api-reference/completions/create#completions/create-temperature) of your API calls to `0`. This value will mean that you’ll get *mostly* deterministic results.\\n\\nLLMs do text completion by predicting the next token based on the probability that it follows the previous tokens. Higher `temperature` settings will introduce more randomness into the results by allowing the LLM to pick tokens with lower probabilities. Because there are so many token selections chained one after one the other, picking one different token can sometimes lead to vastly different results.\\n\\nIf you use the LLM to generate ideas or [alternative implementations](https://realpython.com/chatgpt-coding-mentor-python/#prompt-chatgpt-for-alternative-implementations) of a programming task, then higher values for `temperature` might be interesting. However, they’re generally undesirable when you build a product.\\n\\nIn the example codebase, you can adjust `temperature` right inside your `settings.toml` file:\\n\\nThe initial value is set at `0`. All the examples in this tutorial assume that you leave `temperature` at `0` so that you’ll get mostly deterministic results. If you want to experiment with how a higher temperature changes the output, then feel free to play with it by changing the value for `temperature` in this settings file.\\n\\nIt’s important to keep in mind that you won’t be able to achieve true determinism with the current LLM models offered by OpenAI even if you keep `temperature` at `0`:\\n> An edge-case in GPT-3 with big implications: Inference is non-deterministic (even at temperature=0) when top-2 token probabilities are <1% different. So temperature=0 output is *very close* to deterministic, but actually isn’t. Worth remembering. ([Source](https://twitter.com/goodside/status/1608525976702525440?ref_src=twsrc%5Etfw))\\n\\nSo, while you can’t entirely guarantee that the model will always return the same result, you can get much closer by setting `temperature` to `0`.\\n\\nAnother approach that improves determinism in the results is to set a value for [the `seed` parameter](https://cookbook.openai.com/examples/reproducible_outputs_with_the_seed_parameter). The provided code sets the seed to `12345`. However, this only has an effect on some of the models.\\n\\n## Start Engineering Your Prompts[](#start-engineering-your-prompts \\\"Permanent link\\\")\\n\\nNow that you have an understanding of prompt engineering and the practical project that you’ll be working with, it’s time to dive into some prompt engineering techniques. In this section, you’ll learn how to apply the following techniques to your prompts to get the desired output from the language model:\\n\\n- **Zero-shot prompting:** Giving the language model normal instructions without any additional context\\n- **Few-shot prompting:** Conditioning the model on a few examples to boost its performance\\n- **Using delimiters:** Adding special tokens or phrases to provide structure and instructions to the model\\n- **Detailed, numbered steps:** Breaking down a complex prompt into a series of small, specific steps\\n\\n\\nBy practicing these techniques with the customer chat conversation example, you’ll gain a deeper understanding of how prompt engineering can enhance the capabilities of language models and improve their usefulness in real-world applications.\\n\\n### Describe Your Task[](#describe-your-task \\\"Permanent link\\\")\\n\\nYou’ll start your prompt engineering journey with a concept called **zero-shot prompting**, which is just a fancy way of saying that you’re asking a question or describing a task:\\n> Remove personally identifiable information, only show the date, and replace all swear words with “😤”\\n\\nThis task description focuses on the requested steps for sanitizing the customer chat conversation and literally spells them out. This is the prompt that’s currently saved as `instruction_prompt` in the `settings.toml` file:\\n\\nIf you run the Python script and provide the support chat file as an argument, then it’ll send this prompt together with the content of `chats.txt` to OpenAI’s text completion API:\\n\\nIf you correctly installed the dependencies and added your OpenAI API key as an environment variable, then all you need to do is wait until you see the API response pop up in your terminal:\\n\\nIn the example output, you can see that the prompt that you provided didn’t do a good job tackling the tasks. And that’s putting it gently! It picked up that it should do something with the huffing emoji and reduce the [ISO date-time](https://en.wikipedia.org/wiki/ISO_8601) to only a date. Your results might not have tackled all of that. Overall, nearly all of the work is left undone and the output is useless.\\n\\nIf you’re new to interacting with LLMs, then this may have been a first attempt at outsourcing your development work to the text completion model. But these initial results aren’t exactly exhilarating.\\n\\nSo you’ve described the task in natural language and gotten a bad result. But don’t fret—throughout the tutorial you’ll learn how you can get more useful responses for your task.\\n\\nOne way to do that is by increasing the number of **shots**, or examples, that you give to the model. When you’ve given the model zero shots, the only way to go is up! That’s why you’ll improve your results through **few-shot prompting** in the next section.\\n\\n### Use Few-Shot Prompting to Improve Output[](#use-few-shot-prompting-to-improve-output \\\"Permanent link\\\")\\n\\nFew-shot prompting is a prompt engineering technique where you provide example tasks and their expected solutions in your prompt. So, instead of just describing the task like you did before, you’ll now add an example of a chat conversation and its sanitized version.\\n\\nOpen up `settings.toml` and change your `instruction_prompt` by adding such an example:\\n\\nOnce you’ve applied the change, give the LLM another chance to sanitize the chat conversations for you by running the script again:\\n\\nYou’ll have to wait for the LLM to predict all the tokens. When it’s done, you’ll see a fresh response pop up in your terminal:\\n\\nOkay, great! This time at least the LLM didn’t eat up all the information that you passed to it without giving anything useful back!\\n\\nThis time, the model tackled some of the tasks. For example, it sanitized the names in square brackets. However, the names of the customers are still visible in the actual conversations. It also didn’t censor the order numbers or the email address.\\n\\nThe model probably didn’t sanitize any of the names in the conversations or the order numbers because the chat that you provided didn’t contain any names or order numbers. In other words, the output that you provided didn’t show an example of redacting names, order numbers, or email addresses in the conversation text.\\n\\nHere you can see how important it is to choose good examples that clearly represent the output that you want.\\n\\nSo far, you’ve provided *one* example in your prompt. To cover more ground, you’ll add another example so that this part of your prompt truly puts the *few* in few-shot prompting:\\n\\nYou added a second example that contains both a customer name as well as an order number in the chat text body. The example of a sanitized chat shows both types of sensitive data replaced with a sequence of asterisks (`****`). Now you’ve given the LLM a good example to *model*.\\n\\nAfter editing `instruction_prompt` in `settings.toml`, run your script again and wait for the response to print to your terminal:\\n\\nWait? Where did most of the output go? You probably expected to see better results, but it looks like you’re getting only two of the conversations back this time!\\n\\nYou’ve added more text to your prompt. At this point, the task instructions probably make up proportionally too few tokens for the model to consider them in a meaningful way. The model lost track of what it was supposed to do with the text that you provided.\\n\\nAdding more examples should make your responses stronger instead of eating them up, so what’s the deal? You can trust that few-shot prompting works—it’s a widely used and very effective prompt engineering technique. To help the model distinguish which part of your prompt contains the instructions that it should follow, you can use *delimiters*.\\n\\n### Use Delimiters to Clearly Mark Sections of Your Prompt[](#use-delimiters-to-clearly-mark-sections-of-your-prompt \\\"Permanent link\\\")\\n\\nIf you’re working with content that needs specific inputs, or if you provide examples like you did in the previous section, then it can be very helpful to clearly mark specific sections of the prompt. Keep in mind that everything you write arrives to an LLM as a single prompt—a long sequence of tokens.\\n\\nYou can improve the output by using delimiters to fence and label specific parts of your prompt. In fact, if you’ve been running the example code, then you’ve already used delimiters to fence the content that you’re reading from file.\\n\\nThe script adds the delimiters when assembling the prompt in `app.py`:\\n\\nIn line 13, you wrap the chat content in between `>>>>>` and `<<<<<` delimiters. Marking parts of your prompt with delimiters can help the model keep track of which tokens it should consider as a single unit of meaning.\\n\\nYou’ve seen in the previous section that missing delimiters can lead to unexpected results. You might receive less output than expected, like in the previous example, or an empty response. But you might also receive output that’s quite different from what you want! For example, imagine that the chat content that you’re reformatting contains a question at the end, such as:\\n> Can you give me your order number?\\n\\nIf this question is the last line of your prompt without delimiters, then the LLM might continue the imaginary chat conversation by answering the question with an imaginary order number. Give it a try by adding such a sentence to the end of your current prompt!\\n\\nDelimiters can help to separate the content and examples from the task description. They can also make it possible to refer to specific parts of your prompt at a later point in the prompt.\\n\\nA delimiter can be any **sequence of characters** that usually *wouldn’t* appear together, for example:\\n\\n- `>>>>>`\\n- `====`\\n- `####`\\n\\n\\nThe number of characters that you use doesn’t matter too much, as long as you make sure that the sequence is relatively unique. Additionally, you can add labels just before or just after the delimiters:\\n\\n- `START CONTENT>>>>>` content `<<<<>>>>CONTENT<<<<<` in your task description. These delimiters match the delimiters that the code in `app.py` adds when assembling the prompt.\\n\\nYou’ve also delimited the examples that you’re providing with `#### START EXAMPLES` and `#### END EXAMPLES`, and you differentiate between the inputs and expected outputs using multiple dashes (`------`) as delimiters.\\n\\nWhile delimiters can help you to get better results, in this case your output is quite similar to before:\\n\\nIt’s noticeable that the model only shows the two example data that you passed as examples. Could it be that your prompt leads to something similar like [overfitting](https://realpython.com/linear-regression-in-python/#underfitting-and-overfitting)? Using the actual data that you want to sanitize as your training data is, anyway, not a good idea, so in the next section, you’ll make sure to change that.\\n\\nIn this section, you’ve learned how you can clarify the different parts of your prompt using delimiters. You marked which part of the prompt is the task description and which part contains the customer support chat conversations, as well as the examples of original input and expected sanitized output.\\n\\n### Test Your Prompt Across Different Data[](#test-your-prompt-across-different-data \\\"Permanent link\\\")\\n\\nSo far, you’ve created your few-shot examples from the same data that you also run the sanitation on. This means that you’re effectively using your test data to provide context to the model. Mixing training, validation, and testing data is a bad practice in machine learning. You might wonder how well your prompt generalizes to different input.\\n\\nTo test this out, run the script another time with the same prompt using the second file that contains chat conversations, `testing-chats.txt`. The conversations in this file contain different names, and different—soft—swear words:\\n\\nYou’ll keep running your script using `testing-chats.txt` moving forward, unless indicated differently.\\n\\nOnce you’ve waited for the LLM to generate and return the response, you’ll notice that the result isn’t very satisfying:\\n\\nThe model now understands that you meant the examples as examples to follow when applying edits and gives you back all of the new input data. However, it didn’t do a great job following the instructions.\\n\\nThe model didn’t identify new swear words and didn’t replace them. The model also didn’t redact the order numbers, nor did it anonymize the names. It looks like it only managed to reformat your date strings.\\n\\nSo your engineered prompt currently doesn’t work well, and generalizes even worse. If you built a pipeline based on this prompt, where new chats could contain new customer names, then the application would probably continue to perform poorly. How can you fix that?\\n\\nYou’ve grown your prompt significantly by providing more examples, but your task description is still largely just the question that you wrote right at the beginning. To get better results, you’ll need to do some prompt engineering on the task description as well.\\n\\n### Describe Your Request in Numbered Steps[](#describe-your-request-in-numbered-steps \\\"Permanent link\\\")\\n\\nIf you break up your task instructions into a numbered sequence of small steps, then the model is a lot more likely to produce the results that you’re looking for.\\n\\nGo back to your prompt in `settings.toml` and break your initial task description into more granular, specific substeps:\\n\\nWith these step-by-step instructions in place, you’re ready for another run of your script and another inspection of the newly generated output:\\n\\nThat’s a significant improvement! The model managed to follow the pattern of replacing the names in square brackets with `[Agent]` and `[Customer]`, respectively. It correctly identified some new swear words and replaced them with the huffing emoji. The model also redacted the order numbers, and anonymized the names in the conversation texts.\\n\\nOften, one of the best approaches to get better results from an LLM is to make your instructions more specific.\\n\\nFraming your tasks in even smaller and more specific steps, will generally get you better results. Don’t shy away from [some repetition](https://twitter.com/phillip_isola/status/1532189616106881027):\\n\\nIncreasing the specificity of your instructions, and introducing numbered steps, helped you create a well-performing prompt. Your prompt successfully removes personally identifiable information from the conversations, redacts swear words, and reformats the ISO date-time stamp, as well as the usernames.\\n\\nYou could consider your initial task as completed, but there’s more that you want to do, and more prompt engineering techniques to explore. You also know that there are newer models that you could work with, and your success has further piqued your curiosity. It’s time to switch to a different LLM, see how that influences your output, and then continue exploring other techniques.\\n\\n## Perform Chat Completions With GPT-4[](#perform-chat-completions-with-gpt-4 \\\"Permanent link\\\")\\n\\nYou’ve decided to switch to an even more powerful LLM, GPT-4. In the rest of this tutorial, you’ll use GPT-4 to continue exploring other important prompt engineering techniques:\\n\\n1. **Role prompting**: Using a system message to set the tone of the conversation, and using different roles to give context through labeling\\n2. **Chain-of-thought prompting (CoT)**: Giving the model time to think by prompting it to reason about a task, then including the reasoning in the prompt\\n\\n\\nYou’ll also use GPT-4 to **classify the sentiment** of each chat conversation and structure the output format as **JSON**.\\n\\n### Switch to a Different Model[](#switch-to-a-different-model \\\"Permanent link\\\")\\n\\nIf you’re working with the provided script, then all you need to do is pick a chat model from `chat_models` in `settings.toml` and use it as the new value for `model`:\\n\\nChanging these settings will send your request to a different model. Like before, it’ll assemble your prompt in the way necessary for a `/chat/completions` endpoint request, make that request for you, and print the response to your terminal.\\n\\nFor the rest of this tutorial, you’ll work with OpenAI’s latest version of the GPT-4 model. If you don’t have access to this model, then you can instead keep working with the model that you’ve been working with so far.\\n\\nIf you’ve been following along using ChatGPT, then you’re stuck with whatever model currently powers it. Unless you’re a [ChatGPT Plus](https://openai.com/blog/chatgpt-plus) subscriber, then you can [change the model to GPT-4](https://help.openai.com/en/articles/7127997-how-can-i-use-gpt-4-in-chatgpt) on the website.\\n\\nWithout changing your prompt, run your script another time to see the different results of the text completion based only on using a different LLM:\\n\\nSome responses may be relatively similar to the ones with the older model. However, you can also expect to receive results like the one shown above, where most swear words are still present.\\n\\nIt’s important to keep in mind that developing for a specific model will lead to specific results, and swapping the model may improve or deteriorate the responses that you get. Therefore, swapping to a newer and more powerful model won’t necessarily give you better results straight away.\\n\\nAdditionally, it’s also helpful to keep in mind that API calls to larger models will generally [cost more money per request](https://openai.com/pricing). While it can be fun to always use the latest and greatest LLM, it may be worthwhile to consider whether you really need to upgrade to tackle the task that you’re trying to solve.\\n\\n### Add a Role Prompt to Set the Tone[](#add-a-role-prompt-to-set-the-tone \\\"Permanent link\\\")\\n\\nThere are some additional possibilities when interacting with the API endpoint that you’ve only used implicitly, but haven’t explored yet, such as adding **role labels** to a part of the prompt. In this section, you’ll use the `\\\"system\\\"` role to create a system message, and you’ll revisit the concept later on when you [add more roles](#improve-your-output-with-the-power-of-conversation) to improve the output.\\n\\n**Role prompting** usually refers to adding system messages, which represent information that helps to set the context for upcoming completions that the model will produce. System messages usually aren’t visible to the end user. Keep in mind that the `/chat/completions` endpoint models were initially designed for conversational interactions.\\n\\nYou can also use system messages to set a context for your completion task. You’ll craft a bespoke role prompt in a moment. However, for this specific task, the role prompt is likely less important than it might be for some other tasks. To explore the possible influence of a role prompt, you’ll take a little detour and ask your model to play a role:\\n\\nYou keep `instruction_prompt` the same as you engineered it earlier in the tutorial. Additionally, you now add text to `role_prompt`. The role prompt shown above serves as an example for the impact that a misguided prompt can have on your application.\\n\\nUnleash, thou shall, the parchment’s code and behold the marvels unexpected, as the results may stir wonderment and awe:\\n\\nAs you can see, a role prompt can have quite an impact on the language that the LLM uses to construct the response. This is great if you’re building a conversational agent that should speak in a certain tone or language. And you can also use system messages to keep specific setup information present.\\n\\nFor completion tasks like the one that you’re currently working on, you might, however, not need this type of role prompt. For now, you could give it a common boilerplate phrase, such as *You’re a helpful assistant*.\\n\\nTo practice writing a role prompt—and to see whether you can release your customer chat conversations from the reign of that 16th century villain poet—you’ll craft a more appropriate role prompt:\\n\\nThis role prompt is more appropriate to your use case. You don’t want the model to introduce randomness or to change any of the language that’s used in the conversations. Instead, you just want it to execute the tasks that you describe. Run the script another time and take a look at the results:\\n\\nThat looks much better again! Abide concealed in yonder bygone era, ye villainous poet!\\n\\nAs you can see from these examples, role prompts can be a powerful way to change your output. Especially if you’re using the LLM to build a conversational interface, then they’re a force to consider.\\n\\nFor some reason, GPT-4 seems to consistently pick `[Client]` over `[Customer]`, even though you’re specifying `[Customer]` in the few-shot examples. You’ll eventually get rid of these verbose names, so it doesn’t matter for your use case.\\n\\nHowever, if you’re determined and curious—and manage to prompt `[Client]` away—then share the prompt that worked for you in the comments.\\n\\nIn the final section of this tutorial, you’ll revisit using roles and see how you can employ [the power of conversation](#improve-your-output-with-the-power-of-conversation) to improve your output even in a non-conversational completion task like the one you’re working on.\\n\\n### Classify the Sentiment of Chat Conversations[](#classify-the-sentiment-of-chat-conversations \\\"Permanent link\\\")\\n\\nAt this point, you’ve engineered a decent prompt that seems to perform quite well in sanitizing and reformatting the provided customer chat conversations. To fully grasp the power of LLM-assisted workflows, you’ll next tackle the tacked-on request by your manager to also classify the conversations as positive or negative.\\n\\nStart by saving both sanitized conversation files into new files that will constitute the new inputs for your sentiment classification task:\\n\\nYou could continue to build on top of the previous prompt, but eventually you’ll hit a wall when you’re asking the model to do too many edits at once. The classification step is conceptually distinct from the text sanitation, so it’s a good cut-off point to start a new pipeline.\\n\\nThe sanitized chat conversation files are also included in the example codebase:\\n\\nAgain, you want the model to do the work for you. All you need to do is craft a prompt that spells out the task at hand, and provide examples. You can also edit the role prompt to set the context for this new task that the model should perform:\\n\\nYou can now run the script and provide it with the sanitized conversations in `sanitized-testing-chats.txt` that were the output of your previously engineered prompt:\\n\\nYou added another step to your task description and slightly modified the few-shot examples in your prompt. Not a lot of extra work for a task that would have required a lot more work without the help of an LLM. But is this really sufficient? Take a look at the output once your script has finished running:\\n\\nThe output is quite promising! The model correctly labeled conversations with angry customers with the fire emoji. However, the first conversation probably doesn’t entirely fit into the same bucket as the rest because the customer doesn’t display a negative sentiment towards the company.\\n\\nAssume that all of these conversations were resolved positively by the customer service agents and that your company just wants to follow up with those customers who seemed noticeably angry with their situation. In that case, you might need to tweak your prompt a bit more to get the desired result.\\n\\nYou could [add more examples](#use-few-shot-prompting-to-improve-output), which is generally a good idea because it creates more context for the model to apply. Writing a [more detailed description](#describe-your-request-in-numbered-steps) of your task helps as well, as you’ve seen before. However, to tackle this task, you’ll learn about another useful prompt engineering technique called *chain-of-thought* prompting.\\n\\n### Walk the Model Through Chain-of-Thought Prompting[](#walk-the-model-through-chain-of-thought-prompting \\\"Permanent link\\\")\\n\\nA widely successful prompt engineering approach can be summed up with the [anthropomorphism](https://en.wikipedia.org/wiki/Anthropomorphism) of *giving the model time to think*. You can do this with a couple of different specific techniques. Essentially, it means that you prompt the LLM to produce intermediate results that become additional inputs. That way, the reasoning doesn’t need to take distant leaps but only hop from one lily pad to the next.\\n\\nOne of these approaches is to use **chain-of-thought (CoT)** prompting techniques. To apply CoT, you prompt the model to generate intermediate results that then become part of the prompt in a second request. The increased context makes it more likely that the model will arrive at a useful output.\\n\\nThe smallest form of CoT prompting is **zero-shot CoT**, where you literally ask the model to *think step by step*. This approach yields [impressive results](https://arxiv.org/abs/2201.11903) for mathematical tasks that LLMs otherwise often solve incorrectly.\\n\\nChain-of-thought operations are technically split into two stages:\\n\\n1. **Reasoning extraction**, where the model generates the increased context\\n2. **Answer extraction**, where the model uses the increased context to generate the answer\\n\\n\\nReasoning extraction is useful across a variety of CoT contexts. You can generate few-shot examples from input, which you can then use for a separate step of extracting answers using more detailed chain-of-thought prompting.\\n\\nYou can try zero-shot CoT on the sanitized chat conversations to embellish the few-shot examples that you’ll use to classify the chat conversations more robustly. Remove the examples and replace the instructions describing the reasoning on how you would classify the conversations in more detail:\\n\\nYou spelled out the criteria that you want the model to use to assess and classify sentiment. Then you add the sentence *Let’s think step by step* to the end of your prompt.\\n\\nYou want to use this zero-shot CoT approach to generate few-shot examples that you’ll then build into your final prompt. Therefore, you should run the script using the data in `sanitized-chats.txt` this time:\\n\\nYou’ll get back a reference to the conversations, with the reasoning spelled out step by step to reach the final conclusion:\\n\\nThe reasoning is straightforward and sticks to your instructions. If the instructions accurately represent the criteria for marking a conversation as positive or negative, then you’ve got a good playbook at hand.\\n\\nYou can now use this information to improve the few-shot examples for your sentiment classification task:\\n\\nYou’re using the same examples as previously, but you’ve enhanced each of the examples with a short chain of thought that you generated in the previous call. Give your script another spin using `sanitized-testing-chats.txt` as the input file and see whether the results have improved:\\n\\nGreat! Now the first conversation, which was initially classified as negative, has also received the green checkmark.\\n\\nIn this section, you’ve supported your examples with reasoning for why a conversation should be labeled as positive vs negative. You generated this reasoning with another call to the LLM.\\n\\nAt this point, it seems that your prompt generalizes well to the available data and classifies the conversations as intended. And you only needed to carefully craft your words to make it happen!\\n\\n### Structure Your Output Format as JSON[](#structure-your-output-format-as-json \\\"Permanent link\\\")\\n\\nAs a final showcase for effective prompting when incorporating an LLM into your workflow, you’ll tackle the last task, which you added to the list youself: to pass the data on in a structured format that’ll make it straightforward for the customer support team to process further.\\n\\nYou already specified a format to follow in the previous prompt, and the LLM returned what you asked for. So it might just be a matter of asking for a different, more structured format, for example [JSON](https://realpython.com/python-json/):\\n\\nIn your updated `instruction_prompt`, you’ve explicitly asked the model to return the output as valid JSON. Then, you also adapted your few-shot examples to represent the JSON output that you want to receive. Note that you also applied additional formatting by removing the date from each line of conversation and truncating the `[Agent]` and `[Customer]` labels to single letters, `A` and `C`.\\n\\nYou’re still using example chat conversations from your sanitized chat data in `sanitized-chats.txt`, and you send the sanitized testing data from `sanitized-testing-chats.txt` to the model for processing.\\n\\nIn this case, you receive valid JSON, as requested. The classification still works as before and the output censors personally identifiable information, replaces swear words, and applies all the additional requested formatting:\\n\\nYour output may be different and show some small hiccups, but overall, this output is quite impressive and useful! You could pass this JSON structure over to the customer support team, and they could quickly integrate it into their workflow to follow up with customers who displayed a negative sentiment in the chat conversation.\\n\\nYou could stop here, but the engineer in you isn’t quite satisfied yet. All the instructions just in a single prompt? Your premonition calls and tells you tales about maintainability. In the next section, you’ll refactor your prompts to apply role labels before you set up your LLM-assisted pipeline and call it a day.\\n\\n### Improve Your Output With the Power of Conversation[](#improve-your-output-with-the-power-of-conversation \\\"Permanent link\\\")\\n\\nYou added a [role prompt](#add-a-role-prompt-to-set-the-tone) earlier on, but otherwise you haven’t tapped into the power of *conversations* yet.\\n\\nIn this final section, you’ll learn how you can provide additional context to the model by splitting your prompt into multiple separate messages with different labels.\\n\\nIn calls to the `/chat/completions` endpoint, a prompt is split into several **messages**. Each message has its content, which represents the prompt text. Additionally, it also has a **role**. There are [different roles](https://platform.openai.com/docs/api-reference/chat/create#chat/create-role) that a message can have, and you’ll work with three of them:\\n\\n1. `\\\"system\\\"` gives context for the conversation and helps to set the overall tone.\\n2. `\\\"user\\\"` represents the input that a user of your application might provide.\\n3. `\\\"assistant\\\"` represents the output that the model would reply with.\\n\\n\\nSo far, you’ve provided context for different parts of your prompt all mashed together in a single prompt, more or less well separated [using delimiters](#use-delimiters-to-clearly-mark-sections-of-your-prompt). When you use a model that’s optimized for chat, such as GPT-4, then you can use roles to let the LLM know what type of message you’re sending.\\n\\nFor example, you can create some variables for your few-shot examples and separate variables for the associated CoT reasoning and outputs:\\n\\nYou’ve disassembled your `instruction_prompt` into seven separate prompts, based on what role the messages have in your conversation with the LLM.\\n\\nThe helper function that builds a messages payload, `_assemble_chat_messages()`, is already set up to include all of these prompts in the API request. Take a look into `app.py` to check out the separate messages, with their fitting roles, that make up your overall prompt:\\n\\nYour prompt is now split into distinct parts, each of which has a certain role label:\\n\\n- **Example input** has the `\\\"user\\\"` role.\\n- **Reasoning** that the model created has the `\\\"system\\\"` role.\\n- **Example output** has the `\\\"assistant\\\"` role.\\n\\n\\nYou’re now providing context for how user input might look, how the model can reason about classifying the input, and how your expected output should look. You removed the delimiters that you previously used for labeling the example sections. They aren’t necessary now that you’re providing context for the parts of your prompt through separate messages.\\n\\nGive your script a final run to see whether the power of conversation has managed to improve the output:\\n\\nThis JSON structure is looking legitimately great! The formatting that you wanted now shows up throughout, and the conversations are labeled correctly.\\n\\nAdditionally, you’ve improved the maintainability of your prompts by splitting them into separate labels. You can feel proud to pass on such a useful edit of the customer chat conversation data to your coworkers!\\n\\n## FAQs[](#faqs \\\"Permanent link\\\")\\n\\nYou’ve covered common prompt engineering techniques, and here, you’ll find a few questions and answers that sum up the most important concepts that you’ve covered in this tutorial.\\n\\nYou can use these questions to check your understanding or to recap and solidify what you’ve just learned. After each question, you’ll find a brief explanation hidden in a collapsible section. Click the *Show/Hide* toggle to reveal the answer. Time to dive in!\\n\\nKnowledge about prompt engineering is crucial when you work with large language models (LLMs) because you can receive [much better results](#describe-your-request-in-numbered-steps) with carefully crafted prompts.\\n\\nThe `temperature` setting controls the [amount of randomness](#freeze-responses-by-setting-the-temperature-to-zero) in your output. Setting the `temperature` argument of API calls to `0` will increase consistency in the responses from the LLM. Note that OpenAI’s LLMs are only ever *mostly* deterministic, even with the temperature set to `0`.\\n\\n[Few-shot prompting](#use-few-shot-prompting-to-improve-output) is a common prompt engineering technique where you add examples of expected input and desired output to your prompt.\\n\\n[Using delimiters](#use-delimiters-to-clearly-mark-sections-of-your-prompt) can be helpful when dealing with more complex prompts. Delimiters help to separate and label sections of the prompt, assisting the LLM in understanding its tasks better.\\n\\n[Testing your prompt](#test-your-prompt-across-different-data) with data that’s separate from the training data is important to see how well the model generalizes to new conditions.\\n\\nYes, generally adding more context will lead to more accurate results. However, it’s also important *how* you add the additional context. [Just adding more text](#use-few-shot-prompting-to-improve-output) may lead to worse results.\\n\\n[Role prompting](#add-a-role-prompt-to-set-the-tone) means providing a system message that sets the tone or context for a conversation. This can greatly impact how the model constructs the response. You can also [use roles to provide context labels](#improve-your-output-with-the-power-of-conversation) for parts of your prompt.\\n\\nIn [chain-of-thought (CoT) prompting](#walk-the-model-through-chain-of-thought-prompting), you prompt the LLM to produce intermediate reasoning steps. You can then include these steps in the answer extraction step to receive better results.\\n\\n## Next Steps[](#next-steps \\\"Permanent link\\\")\\n\\nIn this tutorial, you’ve learned about various prompt engineering techniques, and you’ve built an LLM-assisted Python application along the way. If you’d like to learn more about prompt engineering, then check out some related questions, as well as some resources for further study below:\\n\\nYes, prompt engineer can be a real job, especially in the context of AI and machine learning. As a prompt engineer, you design and optimize prompts so that AI models like GPT-4 produce desired responses. However, it might not be a stand-alone job title everywhere. It could be a part of broader roles like [machine learning](https://realpython.com/learning-paths/machine-learning-python/) engineer or [data scientist](https://realpython.com/learning-paths/data-science-python-core-skills/).\\n\\nPrompt engineering, like any other technical skill, requires time, effort, and practice to learn. It’s not necessarily easy, but it’s certainly possible for someone with the right mindset and resources to learn it. If you’ve enjoyed the iterative and text-based approach that you learned about [in this tutorial](#understand-the-purpose-of-prompt-engineering), then prompt engineering might be a good fit for you.\\n\\nThe field of prompt engineering is quite new, and LLMs keep developing quickly as well. The landscape, best practices, and most effective approaches are therefore changing rapidly. To continue learning about prompt engineering using free and open-source resources, you can check out [Learn Prompting](https://learnprompting.org/docs/intro/) and the [Prompt Engineering Guide](https://www.promptingguide.ai/).\\n\\nHave you found any interesting ways to incorporate an LLM into your workflow? Share your thoughts and experiences in the comments below.\\n\\n== **Take the Quiz:**== Test your knowledge with our interactive “Practical Prompt Engineering” quiz. You’ll receive a score upon completion to help you track your learning progress:\\n\\n---\\n\\n\\n[Prompt Engineering: A Practical Example](/quizzes/practical-prompt-engineering/)\\n\\n**Interactive Quiz**\\n\\n[Practical Prompt Engineering](/quizzes/practical-prompt-engineering/)\\n\\nIn this quiz, you'll test your understanding of prompt engineering techniques with large language models (LLMs) like GPT-3.5 and GPT-4. You'll revisit how to work with OpenAI's API, apply prompt engineering techniques to practical examples, and use various strategies to improve your results.\\n\\n========================\\n## Table of contents[/guides/better-prompts-ai-code#table-of-contents](/guides/better-prompts-ai-code#table-of-contents)\\n\\n- [Be specific with language and requirements](#be-specific-with-language-and-requirements)\\n- [Include examples and constraints](#include-examples-and-constraints)\\n- [Break down complex tasks and iterate](#break-down-complex-tasks-and-iterate)\\n- [Avoid common prompting pitfalls](#avoid-common-prompting-pitfalls)\\n- [Graphite Agent: AI code review for quality and consistency](#graphites-Graphite Agent-ai-code-review-for-quality-and-consistency)\\n\\n\\n[Large language models (LLMs)](https://developers.google.com/machine-learning/resources/intro-llms) can generate code in many programming languages, but the quality of their output depends heavily on how you prompt them. Crafting a precise prompt can be the difference between hours of debugging and a seamless implementation. The key is to clearly communicate your intent and requirements, because an AI doesn't inherently know your goals – it only follows the patterns described in your prompt. This guide introduces language-agnostic prompt engineering techniques to help both beginners and experienced developers get more accurate and useful code from AI. We'll illustrate each technique with examples of weak vs. improved prompts to show how small changes can yield better results. For a deeper dive into how AI code review works, see our [guide on AI code review](https://graphite.com/guides/ai-code-review).\\n\\n### Be specific with language and requirements[/guides/better-prompts-ai-code#be-specific-with-language-and-requirements](/guides/better-prompts-ai-code#be-specific-with-language-and-requirements)\\n\\nThe first rule of prompt engineering for code is to be specific about what you want. Vague prompts yield vague (often incorrect) results. Always mention the programming language, the task or algorithm, and any requirements or constraints. For example, consider asking an AI to generate a sorting algorithm:\\n\\n- **Weak prompt:** \\\"Write a sorting algorithm.\\\"\\n- **Improved prompt:** \\\"Write a **[Python](https://www.python.org/)** implementation of **merge sort** optimized for **memory efficiency**, including **time complexity analysis** and **error handling** for edge cases (e.g. empty arrays).\\\"\\n\\n\\nThe improved prompt is much clearer. It specifies:\\n\\n- The programming language (Python) and specific algorithm (merge sort).\\n- The optimization goal (memory efficiency) and required output details (time complexity analysis).\\n- Error handling expectations and an edge case to consider (empty arrays).\\n\\n\\nBy spelling out these details, you greatly reduce ambiguity. [Microsoft's Developer Tools](https://developer.microsoft.com/) research group observed that prompts with explicit specifications reduced the need for back-and-forth refinements by 68%. In practice, this means the AI is more likely to produce correct, ready-to-use code on the first try.\\n\\nBeing specific also means providing technical context when relevant. If certain constraints matter – such as the target environment, API version, performance or security requirements – include them in the prompt. For instance, specify \\\"Node.js 18 server environment\\\" or \\\"Python 3.10+\\\" if it matters. Otherwise, the model might generate code that is technically correct but not suitable for your situation.\\n> **Tip:** AI code review tools like [Graphite Agent](https://graphite.com/features/agent) can help enforce technical requirements and catch missing context.\\n\\n### Include examples and constraints[/guides/better-prompts-ai-code#include-examples-and-constraints](/guides/better-prompts-ai-code#include-examples-and-constraints)\\n\\nAnother powerful technique is to provide examples or test cases in your prompt. Examples act as implicit constraints, guiding the model toward the format or behavior you expect. For instance, if you want a function with a specific signature or output format, show a snippet of that.\\n\\nImagine you need an email validation function. A weak prompt might just say, \\\"Validate an email address.\\\" An improved prompt would describe requirements and give a usage example, e.g.:\\n> **Prompt:** \\\"Create a TypeScript function `validateEmail(email: string): {isValid: boolean; message: string}` that checks if an email is RFC-5322 compliant and rejects disposable domains. For example, if the input is `\\\"test@tempmail.com\\\"`, it should return an object like `{ isValid: false, message: 'Disposable domain not allowed' }`.\\\"\\n\\nIn this improved version, the prompt lists specific rules (RFC compliance and no disposable domains) and provides a clear function signature and an example of expected output. This gives the AI a template to follow and boundaries to respect. As Dr. Rachel Thomas of fast.ai notes, *\\\"examples in prompts serve as implicit constraints that guide the model toward the desired output format.\\\"* By including a small example or an outline of the output, you help the model understand exactly what you want.\\n\\n### Break down complex tasks and iterate[/guides/better-prompts-ai-code#break-down-complex-tasks-and-iterate](/guides/better-prompts-ai-code#break-down-complex-tasks-and-iterate)\\n\\nIf your desired output is complex, avoid requesting everything in one huge prompt. It's often better to break down a complex task into smaller steps or use an iterative approach. Large models can handle reasonably detailed prompts, but asking for a full application in one go is likely to produce a convoluted or partial answer. Instead, tackle complex tasks in stages and possibly across multiple prompts:\\n\\n1. **Decompose the problem:** Outline sub-tasks in separate prompts (e.g. first define the data model, then the API endpoints, then the UI components). This ensures each part is handled with focus.\\n2. **Use step-by-step reasoning:** You can actually instruct the model to \\\"think step by step.\\\" For example: \\\"I need a JavaScript function to find the longest increasing subsequence in an array. Let's solve this step by step: first explain the concept, then choose an algorithm (greedy vs DP), then write the code, then analyze complexity.\\\" By embedding this reasoning process in the prompt, you encourage the AI to work through the problem methodically.\\n3. **Iterate with the AI:** Treat the AI's first output as a draft. Review it, then issue follow-up prompts to refine the code.\\n\\n\\nBy breaking prompts into smaller pieces or sequential turns, you make it easier for the model to comply at each step. You also get opportunities to catch mistakes early and steer the output in the right direction. Remember that you can carry context between prompts (in a chat-based LLM) – use that to your advantage for iterative improvement.\\n\\nFor more information on [integrating AI into your code review workflow](https://graphite.com/guides/integrating-ai-code-review-workflow) and [how to review code written by AI](https://graphite.com/guides/how-to-review-code-written-by-ai), these guides offer practical advice for iterative, AI-assisted development.\\n\\n### Avoid common prompting pitfalls[/guides/better-prompts-ai-code#avoid-common-prompting-pitfalls](/guides/better-prompts-ai-code#avoid-common-prompting-pitfalls)\\n\\nWhile applying the above techniques, watch out for a few common mistakes that can undermine your results:\\n\\n- **Under-specifying the task:** Don't assume the AI \\\"knows\\\" what you intended. If a prompt is too minimal, the model might solve the wrong problem or omit important features. Always double-check that your prompt fully describes the problem.\\n- **Ignoring context or constraints:** Failing to mention the operating environment, framework, or performance/security constraints can result in code that doesn't fit your project needs. Also make sure to include any version requirements or security considerations. Without this, you might get code that technically works but is impractical or unsafe for your use case. The more context you give, the more relevant the output.\\n- **Over-reliance on AI without verification:** Even with good prompts, AI-generated code isn't guaranteed to be perfect. The model might produce subtle logic bugs, use deprecated APIs, or just take an unconventional approach. Never merge AI-written code into production without reviewing it. Treat the AI as a coding assistant, not an infallible coder. Write prompts that encourage best practices (like asking for safe, idiomatic code), and always test and review the output thoroughly. Using linters or static analysis on AI code is also a smart safeguard.\\n> **To compare approaches:** This guide, [Automated vs. manual code reviews: Finding the right balance](https://graphite.com/guides/automated-vs-manual-code-reviews), explores the strengths and limitations of both, and how to combine them for best results.\\n\\nBy avoiding these pitfalls, you ensure that prompt engineering remains a helpful aid rather than a source of technical debt. Prompting is an iterative learning process – over time, you'll learn which clarifications or extra details yield better code from your preferred LLM.\\n\\n### Graphite Agent: AI code review for quality and consistency[/guides/better-prompts-ai-code#graphite-agent-ai-code-review-for-quality-and-consistency](/guides/better-prompts-ai-code#graphite-agent-ai-code-review-for-quality-and-consistency)\\n\\nWriting a good prompt is the first step, but ensuring the quality and consistency of AI-generated code is the next challenge. This is where tools like [Graphite Agent](https://graphite.com/features/agent) come in. Graphite Agent is an AI-powered code review assistant that provides immediate, context-aware feedback on code changes – including those written by an AI. It hooks into your development workflow (e.g. reviewing [pull requests](https://docs.github.com/en/pull-requests)) and flags issues ranging from logic bugs and security vulnerabilities to style violations and missing documentation. Graphite Agent uses your repository's context and even custom rules you define to give relevant suggestions. For example, you can configure Graphite Agent with your project's style guide or common AI mistakes to watch for, and it will catch those patterns in AI-generated code before a human reviewer even sees them.\\n\\n![screenshot of Graphite Agent comment](/images/content/guides/better-prompts-ai-code/sample-Graphite Agent-comment.png)\\n\\nBy incorporating a tool like Graphite Agent in your workflow, you add an extra layer of scrutiny for AI-produced code. It helps ensure the code meets your team's quality standards and is consistent with the rest of your codebase. In practice, Graphite Agent can automatically identify edge cases the AI missed, point out performance issues, and even suggest fixes – all of which saves human reviewers time and instills confidence in using AI-generated code. Remember, prompt engineering and AI coding tools are most effective when paired with robust review practices. [Graphite Agent](https://graphite.com/features/agent) bridges that gap by reviewing AI code for you, so you can safely harness AI code generation while maintaining high code quality and consistency.\\n\\n========================\\n![]()\\n\\nYou've been blocked by network security.\\n\\nIf you think you've been blocked by mistake, file a ticket below and we'll look into it.\\n\\n[File a ticket](https://support.reddithelp.com/hc/en-us/requests/new?ticket_form_id=21879292693140)\\n\\n========================\\nMost people treat prompting like a conversation. They type something in, hope the AI gets it, and move on. But prompting isn’t just dialog: it’s design that shapes the way the generative AI model responds. The words you use, the structure you follow, the context you provide, these might not lock in an exact output, but they point the model in the right direction.\\n\\nAnd once you start thinking that way, everything changes. Prompting stops being a guessing game and becomes a process you can test, tune, and trust. You’re not aiming for one lucky output, you’re aiming for consistency, even across edge cases, over time, and at scale.\\n\\nIn this article, we’ll share 11 prompt engineering best practices that modern developers can apply right away. We’ll begin with the essentials: seven practical techniques for writing clearer, more effective prompts that deliver better results with less trial and error.\\n\\nThen we’ll shift from prompt writing to *actual* prompt engineering. Because while most advice on “prompt engineering” ends at tips for phrasing, **real engineering means treating AI behavior like a system**, something you can version, trace, and optimize like any other part of your stack.\\n\\nThat’s where our platform [Lilypad](/docs/lilypad) comes in. It’s a prompt engineering framework built for developers, designed to help them capture everything that influences an LLM’s output, from the prompt to the code that wraps it, and trace snippets back to its source. With Lilypad, teams can iterate faster, evaluate reliably, and ship AI features that actually hold up under real-world use cases.\\n\\n## 7 Fundamental Prompt Engineering Best Practices[](#7-fundamental-prompt-engineering-best-practices)\\n\\n### 1. Specify Exactly What You Want[](#1-specify-exactly-what-you-want)\\n\\nLarge language models are great at picking up on hints, but that’s not always a good thing. In zero-shot prompts, where no guidance is given beyond the task description, vague instructions are especially risky.\\n\\nIf your prompt is vague, the model will still try to guess what you meant, often filling in the blanks with assumptions that don’t match your intent.\\n\\nThat’s why clarity matters. You need to be direct about what you're asking, how it should be structured, and what to avoid. Don’t just say “make it better” or “keep it simple”: define what “better” means. Does it mean shorter sentences or a specific tone?\\n\\nAssume nothing. If your output needs to follow a certain format, stay within a word count, or match a particular style, say so. The more specific your prompt, the more likely the model is to give you what you actually want.\\n\\nDon’t do this:\\n\\n Summarize this article.\\n\\nInstead, do this:\\n\\n Summarize the following article in three concise bullet points that highlight its key arguments. Avoid including any opinion or unnecessary detail.\\n\\n### 2. Show What Good Looks Like[](#2-show-what-good-looks-like)\\n\\nLLMs are fast learners, at least when it comes to patterns. That means you don’t just have to tell the model what you want. You can show it.\\n\\nBy providing examples of the kind of output you expect (for a certain input), you’re helping the model pick up on the task, the tone, the format, everything. This approach is called few-shot prompting, and it’s especially useful when your output needs to follow a certain structure or carry a bit of nuance.\\n\\nThink of examples as anchors. The model uses them to get its bearings, then generalizes from there. So pick examples that match the tone, format, and level of detail you want in return. Garbage in, garbage out, but good examples in? Much better output back.\\n\\nDon’t do this:\\n\\n Generate a product description for this item: a stainless steel water bottle.\\n\\nInstead, do this:\\n\\n Here’s how I want the product description to be written:\\n Example:\\n Product: Wireless Ergonomic Mouse\\n Description: A sleek, Bluetooth-enabled mouse with a contoured design for all-day comfort and precision.\\n Now generate one for:\\n Product: Stainless Steel Water Bottle\\n\\n### 3. Use Clear, Action-Oriented Instructions[](#3-use-clear-action-oriented-instructions)\\n\\nStart your AI prompts with strong action verbs. Words like Generate, Summarize, or Translate give the model a clear directive, and help cut through the noise.\\n\\nAvoid soft openings like “Can you…” or “I need…”. These weaken the prompt’s intent and make it feel more like a suggestion than a command. Instead, treat your prompt like an instruction to a function: be direct, specific, and purposeful.\\n\\nThis kind of action-oriented language doesn’t just sound better, it actually works better. Many of the model’s training examples came from commands and task-based inputs, so prompts written that way tend to map more cleanly to what the model already understands.\\n\\nDon’t do this:\\n\\n I need help understanding this paragraph.\\n\\nInstead, do this:\\n\\n Summarize this paragraph in natural language to explain it to a high school student.\\n\\n### 4. Guide the Model to Think Step-by-Step[](#4-guide-the-model-to-think-step-by-step)\\n\\nWhen your prompt involves a complex or multi-part task, don’t ask the model to jump straight to the answer. Instead, use [prompt chaining](/blog/prompt-chaining) or step by step guidance to help it think through the problem in stages.\\n\\nThis technique is known as Chain-of-Thought prompting. You’re asking the model to reason out loud before giving its final answer. It’s a simple tweak that can make a big difference, reducing hallucinated answers and encouraging deeper, more accurate responses.\\n\\nIt’s especially useful for complex tasks like logic-heavy prompts, coding challenges, analytical work, and decision-making tasks. Even if you just want a final answer, guiding the process often leads to higher-quality (and more explainable) results.\\n\\nDon’t do this:\\n\\n What’s the best marketing strategy for our new product?\\n\\nInstead, do this:\\n\\n Let's think through this step-by-step:\\n 1. List the top 3 features or benefits of our new productivity app.\\n 2. Identify the primary target audience based on those features.\\n 3. Describe one or two marketing channels that would best reach that audience.\\n 4. Based on all of the above, recommend a simple marketing strategy tailored to this audience.\\n\\n### 5. Specify the Desired Output Format[](#5-specify-the-desired-output-format)\\n\\nLLMs don’t just need to know what to answer: they need to know how to answer. When you define the format you expect, the results get a whole lot better.\\n\\nWant a list? Say so. Need bullet points, a table, JSON, or a block of code? Spell it out. Clear formatting instructions help the model stay focused, avoid hallucinations, and produce outputs that are easier to read, and easier to use.\\n\\nThis matters even more when your output is going into a downstream tool or system. Consistency counts. You can even prime the model by showing the format before the content, setting the structure before it generates the response.\\n\\nDon’t do this:\\n\\n List benefits of remote work.\\n\\nInstead, do this:\\n\\n List 3 key benefits of remote work in bullet points, with each point no longer than one sentence.\\n\\n### 6. Stay with Simple, Accessible Language[](#6-stay-with-simple-accessible-language)\\n\\nLLMs work best when your prompts are clear, direct, and easy to understand, just like any good API. If your wording is vague or overloaded with jargon, the model is more likely to misfire.\\n\\nSkip the slang, idioms, and fuzzy phrases unless they’re truly necessary, and if they are, define them clearly. Words like better, intuitive, or robust might mean something to you, but to the model, they’re open to interpretation.\\n\\nWrite your prompt like you’re explaining specific tasks to someone smart but unfamiliar with your field. The simpler the language, the easier it is for the model to build a clear mental map, and that leads to more consistent, reliable results.\\n\\nDon’t do this:\\n\\n Write a robust, intuitive onboarding flow with all the bells and whistles.\\n\\nInstead, do this:\\n\\n Write a clear, step-by-step onboarding flow that explains the product’s core features in under 5 screens.\\n\\n### 7. Assign the Model a Role When Context Matters[](#7-assign-the-model-a-role-when-context-matters)\\n\\nLLMs tend to respond more accurately, and in the right tone, when you tell them who they’re supposed to be. Giving the model a role helps it adopt the right voice, vocabulary, and level of formality for the task.\\n\\nNeed a professional explanation for a client? Ask the model to act as a consultant. Writing a friendly support reply? Frame it as coming from a helpful rep. Roles help guide tone, audience awareness, and framing, all of which make the output more relevant and useful.\\n\\nSome argue that assigning a role doesn’t truly change the model, it just forces you to clarify what you actually want, which might be a valid reason it works so well.\\n\\nDon’t do this:\\n\\n Explain how to use Git to someone new.\\n\\nInstead, do this:\\n\\n You are a programming instructor teaching a beginner. Explain how to use Git for version control in simple, friendly language.\\n\\n## 4 Best Practices to Make Your Prompts Repeatably Good[](#4-best-practices-to-make-your-prompts-repeatably-good)\\n\\nAt small scale, writing better prompts can get you far. But once you’re shipping [LLM applications](/blog/llm-applications) to real users, with unpredictable inputs and evolving models, prompt quality alone won’t cut it.\\n\\nLLMs are inherently non-deterministic since the same prompt can yield different outputs. So the question becomes not “how do I write the perfect prompt?” but “how do I engineer this system, including [LLM integration](/blog/llm-integration), to deliver reliable results?”\\n\\nThat makes prompt engineering a process of continuous improvement, what machine learning developers would call an optimization problem.\\n\\nSolving it means tracking *everything* that changes, measuring what works, and testing new ideas against old ones in your artificial intelligence systems. It’s no longer just about the prompt, but the entire system: inputs, outputs, and the code connecting them. In other words, effective [LLM orchestration](/blog/llm-orchestration).\\n\\nThe four best practices below approach prompt engineering as a system-level discipline that you version, trace, and improve over time.\\n\\n**We built Lilypad with this in mind**. While other platforms focus on narrow slices, like playgrounds or prompt versioning, they often fall short when it comes to implementing prompt engineering techniques that make LLM behavior repeatable at scale.\\n\\nFor example, such tools version the prompt but ignore things like parameters or helper logic. Others silo tracing and evaluation, forcing constant context-switching just to label or compare outputs.\\n\\nLilypad brings it all together. In the best practices for prompt engineering that follow, we’ll show how it helps teams apply real engineering principles to prompt development and make outputs reliably good.\\n\\n### 8. Version Everything that Influences the Output[](#8-version-everything-that-influences-the-output)\\n\\nIf it affects the LLM’s behavior, it should be versioned. Not just the prompt, but the model, temperature, helper functions, preprocessing steps, and surrounding logic.\\n\\nWithout versioning, you can’t reproduce outputs or track what changed. That’s why prompt engineering should follow the same rules as production code: every change to logic or inputs should create a new version.\\n\\nVersioning gives you visibility. It lets you roll back, track progress, and understand exactly what changed when results shift.\\n\\nLilypad makes this easy by versioning the full context; not just the prompt, but the code that shapes the call. In fact, we recommend wrapping every LLM call in a Python function to capture the model, parameters, prompt, and any pre/post-processing in one place, even when working with multi-step patterns like [LLM chaining](/blog/llm-chaining).\\n\\nAdd the `@lilypad.trace` decorator and set `versioning=automatic` to have Lilypad version every change made within the function closure.\\n\\n import lilypad\\n import os\\n from openai import OpenAI\\n\\n lilypad.configure(auto_llm=True) \\n\\n client = OpenAI()\\n\\n @lilypad.trace(versioning=\\\"automatic\\\") \\n def answer_question(question: str) -> str:\\n completion = client.chat.completions.create(\\n model=\\\"gpt-4o-mini\\\",\\n messages=[{\\\"role\\\": \\\"user\\\", \\\"content\\\": question}],\\n )\\n return str(completion.choices[0].message.content)\\n\\n response = answer_question(\\\"What makes popcorn pop?\\\") \\n print(response)\\n\\nThis shifts your workflow from ad hoc prompt tweaking to a structured, iterative process. The function now becomes a unit of optimization, something you can version, evaluate, and refine over time, much like a machine learning model.\\n\\nYou’re no longer asking, “Did this prompt change help?” You’re comparing, “Did version 4 outperform version 3 on our eval set?” Each time that function runs, whether in dev, staging, or production, Lilypad captures the full generation: it can break down the inputs, model responses, parameters, and the exact version of the code that produced it. That data flows straight into the Lilypad playground.\\n\\nThe playground is a collaborative prompt engineering environment where you evaluate and compare results. You can:\\n\\n- View outputs across different versions\\n- Inspect inputs and parameters for every call\\n- Swap between model or prompt versions to see how behavior changes\\n- Annotate outputs with pass/fail labels or notes\\n- Replay generations to test reproducibility or debug edge cases\\n\\n\\nInstead of guessing what changed (or why) you get a full trace, backed by version history and real inputs.\\n\\nFor example, we see below that V6 of our code calls `gpt-4o`:\\n\\n![Lilypad Answer Question v6 Trace](/assets/blog/prompt-engineering-best-practices/lilypad-answer-question-v6-trace.webp)\\n\\nChanging the model type automatically increments the version number:\\n\\n![Lilypad Answer Question v7 Trace](/assets/blog/prompt-engineering-best-practices/lilypad-answer-question-v7-trace.webp)\\n\\n### 9. Trace Every Output You Generate[](#9-trace-every-output-you-generate)\\n\\nTracing is how you observe what the model actually did, given a specific input, set of parameters, and version of your code.\\n\\nA good trace captures everything that matters: inputs, outputs, model settings, token usage, latency, cost, and more. When something breaks or performance drops, traces let you debug the issue instead of guessing. Observability isn’t optional. It’s foundational to building reliable LLM-powered apps, especially in production.\\n\\nThat’s why we recommend to trace not just raw API calls, but also the structured execution of your LLM-powered functions. You want to know not only *what* happened, but where it happened, and why.\\n\\nThat’s where Lilypad’s tracing system comes in. Just call `lilypad.configure()` to enable automatic tracing. This captures raw LLM API calls and logs critical metadata like inputs, outputs, token usage, and costs, giving you an API-level footprint of your versioned calls.\\n\\n![Lilypad Traces Tab](/assets/blog/prompt-engineering-best-practices/lilypad-traces-tab.webp)\\n\\nBut to make those traces truly actionable, the `@lilypad.trace` decorator promotes each trace from a loose log to a structured unit of execution, linking it directly to the function that made the call, like `answer_question()` in our earlier example. That lets you filter, group, and analyze traces by logical units, not just endpoints or paths.\\n\\n![Lilypad Nested OpenTelemetry Span](/assets/blog/prompt-engineering-best-practices/lilypad-nested-otel-span.webp)\\n\\n### 10. Offer Safe and Controlled Prompt Collaboration[](#10-offer-safe-and-controlled-prompt-collaboration)\\n\\nPrompt tuning shouldn't be limited to engineers. Product managers, writers, and domain experts often bring the kind of insight that makes or breaks how you write prompts, especially when tone, clarity, or context really matter.\\n\\nThat means you need a way to let non-developers contribute without touching production code.\\n\\nWith Lilypad, you can do exactly that. It supports managed prompts, which are versioned prompt templates that live in the playground. This lets contributors edit, test, and iterate on prompts directly in the browser, while developers reference those prompts safely in code, without ever needing to redeploy the app just to apply a wording tweak.\\n\\n![Lilypad Playground](/assets/blog/prompt-engineering-best-practices/lilypad-playground.webp)\\n\\nThis creates a clean separation of concerns: developers handle infrastructure and application logic, while subject matter experts own the prompt wording. This division is especially valuable when collaborating on more advanced use cases, such as [LLM agents](/blog/llm-agents), where prompt components and control logic need to evolve independently.\\n\\nEach team can move quickly without blocking or breaking the other, improving the user experience. And because the playground features prompts that are markdown-enabled and fully versioned, collaborators can experiment freely, revert changes, and keep improving without risk.\\n\\nBut make no mistake: an [LLM prompt](/blog/llm-prompt) isn’t a floating artifact detached from your codebase. Under the hood, Lilypad treats them just like any other function. The playground generates type-safe code that matches the prompt version you’re referencing in production.\\n\\nIf Lilypad ever goes offline, your code keeps running as-is, with no hidden dependencies, no broken links.\\n\\nThis contrasts with many other systems, which treat prompts as separate from code entirely. That kind of separation might seem flexible, but it makes your system brittle. When the logic that drives model behavior lives outside your source of truth, it’s harder to test, debug, or trust what’s actually going into production.\\n\\n### 11. Make Evals a First-Class Citizen[](#11-make-evals-a-first-class-citizen)\\n\\nWithout evaluation, you’re running without feedback. You can’t know whether a prompt change improved performance, introduced regressions, or simply altered behavior in unpredictable ways.\\n\\n[LLM evaluation](/blog/llm-evaluation) shouldn’t be an afterthought, but should be a core part of your development workflow. A practical place to start is with binary labels: pass/fail, accompanied by a brief explanation. This approach is faster to apply, easier to interpret, and more consistent than numeric scoring systems.\\n\\nBy contrast, ratings like 1–5 often suffer from ambiguity. What distinguishes a “4” from a “5”? Without clear criteria, such scores quickly become subjective and inconsistent, even across reviews from the same person.\\n\\nPass/fail evaluations avoid that ambiguity and better reflect real-world needs. They align with the core question most teams care about: is this output good enough to deploy?\\n\\nThis structure also lends itself to automation. Once you’ve built a solid evaluation dataset, some [LLM tools](/blog/llm-tools) allow you to prompt an LLM to act as a judge, replicating the behavior of human evaluators. That said, [LLM-as-a-judge](/blog/llm-as-judge) outputs should still be verified by humans, especially when dealing with edge cases where outputs may be difficult to assess.\\n\\nLilypad makes evaluation easy to adopt. Non-technical contributors can annotate outputs from AI tools directly in the playground UI: selecting pass or fail and providing reasoning with just a few clicks.\\n\\n![Lilypad Annotation Queue](/assets/blog/prompt-engineering-best-practices/lilypad-annotation-queue.webp)\\n\\nEach evaluation is tied to a specific version of the function that generated the model outputs. That means when quality shifts, whether for better or worse, you can immediately trace back to what changed: the prompt, model, code, parameters, or any combination of the above.\\n\\n## Go Beyond Prompt Writing, Start Prompt Engineering[](#go-beyond-prompt-writing-start-prompt-engineering)\\n\\nBuild with the same rigor you apply to the rest of your stack: Lilypad brings versioning, observability, and [prompt evaluation](/blog/prompt-evaluation) to every call made to AI models to help ensure desired outcomes.\\n\\n[To get started with Lilypad](https://lilypad.mirascope.com/), you can use your GitHub credentials to sign up and begin tracing and versioning your LLM functions with just a few lines of code.\\n\\n========================\\n## Getting Started with Function Calling[](#getting-started-with-function-calling)\\n\\n[https://www.youtube.com/embed/p0I-hwZSWMs?si=tQgi-LiHe6Oj_rzm](https://www.youtube.com/embed/p0I-hwZSWMs?si=tQgi-LiHe6Oj_rzm)\\n\\nFunction calling is the ability to reliably connect LLMs to external tools to enable effective tool usage and interaction with external APIs.\\n\\nLLMs like GPT-4 and GPT-3.5 have been fine-tuned to detect when a function needs to be called and then output JSON containing arguments to call the function. The functions that are being called by function calling will act as tools in your AI application and you can define more than one in a single request.\\n\\nFunction calling is an important ability for building LLM-powered chatbots or agents that need to retrieve context for an LLM or interact with external tools by converting natural language into API calls.\\n\\nFunctional calling enables developers to create:\\n\\n- conversational agents that can efficiently use external tools to answer questions. For example, the query \\\"What is the weather like in Belize?\\\" will be converted to a function call such as `get_current_weather(location: string, unit: 'celsius' | 'fahrenheit')`\\n- LLM-powered solutions for extracting and tagging data (e.g., extracting people names from a Wikipedia article)\\n- applications that can help convert natural language to API calls or valid database queries\\n- conversational knowledge retrieval engines that interact with a knowledge base\\n\\n\\nIn this guide, we demonstrate how to prompt models like GPT-4 and open-source models to perform function calling for different use cases.\\n\\n## Function Calling with GPT-4[](#function-calling-with-gpt-4)\\n\\nAs a basic example, let's say we asked the model to check the weather in a given location.\\n\\nThe LLM alone would not be able to respond to this request because it has been trained on a dataset with a cutoff point. The way to solve this is to combine the LLM with an external tool. You can leverage the function calling capabilities of the model to determine an external function to call along with its arguments and then have it return a final response. Below is a simple example of how you can achieve this using the OpenAI APIs.\\n\\nLet's say a user is asking the following question to the model:\\n\\nTo handle this request using function calling, the first step is to define a weather function or set of functions that you will be passing as part of the OpenAI API request:\\n\\nThe `get_current_weather` function returns the current weather in a given location. When you pass this function definition as part of the request, it doesn't actually executes a function, it just returns a JSON object containing the arguments needed to call the function. Here are some code snippets of how to achieve this.\\n\\nYou can define a completion function as follows:\\n\\nThis is how you can compose the user question:\\n\\nFinally, you can call the `get_completion` above and passing both the `messages` and `tools`:\\n\\nThe `response` object contains the following:\\n\\nIn particular, the `arguments` object contains the important arguments extracted by the model and that will be needed to complete the request.\\n\\nYou can then choose to call an external weather API for the actual weather. Once you have the weather information available you can pass it back to the model to summarize a final response given the original user question.\\n\\n## Notebooks[](#notebooks)\\n\\nHere is a notebook with a simple example that demonstrates how to use function calling with the OpenAI APIs:\\n\\n## Function Calling with Open-Source LLMs[](#function-calling-with-open-source-llms)\\n\\nMore notes on function calling with open-source LLMs coming soon.\\n\\n## Function Calling Use Cases[](#function-calling-use-cases)\\n\\nBelow is a list of use cases that can benefit from the function calling capability of LLMs:\\n\\n- **Conversational Agents**: Function calling can be used to create complex conversational agents or chatbots that answer complex questions by calling external APIs or external knowledge base and providing more relevant and useful responses.\\n\\n- **Natural Language Understanding**: It can convert natural language into structured JSON data, extract structured data from text, and perform tasks like named entity recognition, sentiment analysis, and keyword extraction.\\n\\n- **Math Problem Solving**: Function calling can be used to define custom functions to solve complex mathematical problems that require multiple steps and different types of advanced calculations.\\n\\n- **API Integration**: It can be used to effectively integrate LLMs with external APIs to fetch data or perform actions based on the input. This could be helpful to build either a QA system or creative assistant. In general, function calling can convert natural language into valid API calls.\\n\\n- **Information Extraction**: Function calling be effectively used to extract specific information from a given input, such as retrieving relevant news stories or references from an article.\\n\\n## References[](#references)\\n\\n- [Fireworks Raises the Quality Bar with Function Calling Model and API Release (opens in a new tab)](https://blog.fireworks.ai/fireworks-raises-the-quality-bar-with-function-calling-model-and-api-release-e7f49d1e98e9)\\n- [Benchmarking Agent Tool Use and Function Calling (opens in a new tab)](https://blog.langchain.dev/benchmarking-agent-tool-use/)\\n- [Function Calling (opens in a new tab)](https://ai.google.dev/docs/function_calling)\\n- [Interacting with APIs (opens in a new tab)](https://python.langchain.com/docs/use_cases/apis)\\n- [OpenAI's Function Calling (opens in a new tab)](https://platform.openai.com/docs/guides/function-calling)\\n- [How to call functions with chat models (opens in a new tab)](https://cookbook.openai.com/examples/how_to_call_functions_with_chat_models)\\n- [Pushing ChatGPT's Structured Data Support To Its Limits (opens in a new tab)](https://minimaxir.com/2023/12/chatgpt-structured-data/)\\n- [Math Problem Solving with Function Calling (opens in a new tab)](https://github.com/svpino/openai-function-calling/blob/main/sample.ipynb)\\n\\n\\nLast updated on Sat Dec 20 2025\\n\\nSponsored by[![SerpAPI](https://cdn.rawgit.com/standard/standard/master/docs/logos/serpapi.png)](https://serpapi.com/)\\n\\n[![SVG Image]()Fine-tuning GPT-4o](/applications/finetuning-gpt4o \\\"Fine-tuning GPT-4o\\\")[Context Caching with LLMs![SVG Image]()](/applications/context-caching \\\"Context Caching with LLMs\\\")\\n\\n========================\\nLarge Language Models (LLMs) have revolutionized code generation, but to get high-quality, useful output, creating effective prompts is crucial. The quality of the generated code is heavily dependent on the quality of the prompts provided. A poorly framed prompt can lead to incomplete, incorrect, or generic responses, whereas a well-structured prompt maximizes the model’s potential. In this article, we will explore advanced strategies for writing effective prompts to generate high-quality code with LLMs.\\n\\n## Provide Detailed Context\\n\\n[](#provide-detailed-context)\\n\\nWhen interacting with LLMs for code generation, the depth and quality of context provided directly correlates with the relevance and accuracy of the output.\\n\\nKey elements to include:\\n\\n- Specific problem domain\\n- Existing codebase characteristics\\n- Implementation constraints\\n- Performance requirements\\n- Architectural patterns already in use\\n\\n\\nAdditionally, you can use *@references* to point the model to specific files or functions, making your request more precise. Instead of describing a function in text, you can directly reference it.\\n\\n ❌ Poor: \\\"Create a user authentication system.\\\"\\n\\n ✅ Better: \\\"Create a JWT-based authentication system for a Node.js Express API that integrates with our MongoDB user collection. The system should handle password hashing with bcrypt, issue tokens valid for 24 hours, and implement refresh token rotation for security. Our existing middleware pattern uses async/await syntax. Refer to @authMiddleware.js for the middleware structure and @userModel.js for the user schema.\\\"\\n\\nBy using *@authMiddleware.js* and *@userModel.js*, you ensure the generated code aligns with your existing setup, reducing integration issues and manual adjustments.\\n\\n## Break Down Problems Into Steps\\n\\n[](#break-down-problems-into-steps)\\n\\nComplex coding tasks require systematic decomposition into manageable units. This approach begins with:\\n\\n- Start with clear functionality requirements\\n- Analyze directory structure and code organization\\n- Guide the LLM through logical implementation steps for the desired functionality while respecting established architectural boundaries and design patterns.\\n\\n\\nFor instance, when implementing a data processing pipeline, first clarify the input data structure, transformation logic, error handling requirements, and expected output format. Next, analyze the directory structure and determine where the new functionality should be implemented.\\n\\nConsider factors such as dependency relationships, module boundaries, and code organization principles. This step ensures that generated code will integrate seamlessly with the existing codebase.\\n\\n## Choose the Correct Model for the Job\\n\\n[](#choose-the-correct-model-for-the-job)\\n\\nDifferent LLMs exhibit varying strengths in code generation tasks. One model may excel at understanding complex requirements and generating code with strong logical consistency, while another model may offer advantages in certain programming languages or frameworks. When evaluating which LLM to use, key technical factors to consider:\\n\\n- Context window capacity (essential when working with extensive codebases)\\n- Language/framework proficiency\\n- Domain-specific knowledge\\n- Consistency across iterations\\n\\n\\nExample comparison:\\n\\n| Task | Model Selection Consideration |\\n| --- | --- |\\n| Complex enterprise architecture | Models with larger context windows excel at maintaining consistency across large codebases |\\n| ML pipeline implementation | Models with stronger mathematics and data science training perform better |\\n| Frontend component generation | Models with recent training on modern frameworks provide up-to-date patterns |\\n\\n\\n## Be Specific When Referring to Existing Patterns\\n\\n[](#be-specific-when-referring-to-existing-patterns)\\n\\nSpecificity in prompts significantly improves code quality by eliminating uncertainity. Technical specificity involves explicit references to existing implementation patterns. Rather than requesting generic implementations, point to specific reference points in the codebase. For example:\\n\\n ❌ Poor: \\\"Write a function to process user data.\\\"\\n\\n ✅ Better: \\\"Create a new method in the UserProcessor class (src/services/UserProcessor.js) that transforms user data following the same functional approach used in the transformPaymentData method. Prioritize readability over performance as this runs asynchronously.\\\"\\n\\nThis approach extends to naming conventions, coding standards, and architectural patterns. Specify whether the code should follow functional or object-oriented methodologies, indicate preferred design patterns, and clarify whether performance or readability should be prioritized.\\n\\n## Regenerate Rather Than Rollback\\n\\n[](#regenerate-rather-than-rollback)\\n\\nWhen encountering issues with generated code, complete regeneration of the problematic parts often gives us much better results compared to incremental fixes. This method originates from how LLMs interpret context and produce responses.\\n\\nWhy regeneration works better?\\n\\n- Provides fresh perspective without previous errors\\n- Avoids propagating flawed logic\\n- Allows incorporation of new constraints\\n\\n\\nThis technique is particularly effective for algorithmic challenges or complex logic implementations where small errors can propagate throughout the solution, making isolated fixes problematic.\\n\\nExample:\\n\\n \\\"Let's try a different approach for the sorting algorithm. The previous implementation had O(n²) complexity, which won't work for our dataset size. Please regenerate the solution focusing on an O(n log n) approach using a merge sort pattern similar to what we use in our other data processing functions.\\\"\\n\\n## Implement Reflection Through Multiple Approaches\\n\\n[](#implement-reflection-through-multiple-approaches)\\n\\nLeveraging LLMs' ability to generate multiple solution approaches enhances code quality through comparative analysis. Begin by requesting the model to generate two or three distinct implementation strategies, each with its own strengths and weaknesses.\\n\\nOnce multiple approaches are generated, prompt the LLM to analyze the trade-offs between them considering factors such as time complexity, space efficiency, readability, and maintainability. This reflection process enables the model to select and refine the most appropriate solution based on the specific requirements.\\n\\nExample:\\n\\n \\\"Generate three different approaches to implement a caching system for our API responses:\\n\\n 1. An in-memory LRU cache using a custom data structure\\n 2. A Redis-based distributed cache solution\\n 3. A file-system based approach with TTL\\n\\n For each approach, analyze time complexity, memory usage, scalability across multiple servers, and implementation complexity.\\\"\\n\\n## Implement Self-Review Mechanisms\\n\\n[](#implement-self-review-mechanisms)\\n\\nSelf-review prompting enhances code quality by guiding the LLM through a systematic evaluation of its output. Implement this by explicitly requesting the model to cross-check its generated code after completion. The review should assess aspects such as:\\n\\n- Correctness (logical errors)\\n- Efficiency (performance issues)\\n- Edge case handling\\n- Security vulnerabilities\\n- Adherence to requirements\\n\\n\\nDuring self-review, the model can identify potential issues such as race conditions in concurrent code, memory leaks in resource management, or vulnerability points in security-critical sections. Once issues are identified, the model can immediately refine the implementation to address these concerns. This approach mirrors established software engineering practices like code review and static analysis, but performs them within the same prompt-response cycle, significantly improving the initial code quality.\\n\\n## Give the Model a Persona or Frame of Reference\\n\\n[](#give-the-model-a-persona-or-frame-of-reference)\\n\\nAssigning a technical persona to the LLM establishes a consistent perspective for code generation. When prompted to adopt the mindset of a senior backend engineer with expertise in distributed systems, the model will prioritize scalability, fault tolerance, and performance considerations in its generated code. Similarly, a security-focused persona will emphasize input validation, proper authentication flows, and potential vulnerability mitigation.\\n\\nThe technical frame of reference should match the requirements of the task.\\n\\nEffective personas by task:\\n\\n- Backend systems: \\\"Senior backend engineer with distributed systems expertise\\\"\\n- Security features: \\\"Security architect with OWASP expertise\\\"\\n- Infrastructure: \\\"DevOps engineer focusing on cloud-native solutions\\\"\\n- Frontend: \\\"UX-focused frontend developer with accessibility expertise\\\"\\n\\n\\nThis technique leverages the model's ability to imitate domain expertise, resulting in code that better reflects established practices within specific technical domains.\\n\\nExample:\\n\\n \\\"Act as a senior security engineer conducting a code review. Create a user registration system in Python/Django that implements proper password handling, input validation, and protection against common web vulnerabilities.\\\"\\n\\n## Clarify Language, Framework, or Library Constraints\\n\\n[](#clarify-language-framework-or-library-constraints)\\n\\nExplicit specification of technical constraints ensures compatibility with the target environment. Begin by clearly stating the programming language version (e.g., Python 3.9, TypeScript 4.5) to ensure language features used in the generated code are available in the production environment. Similarly, specify framework versions and their specific conventions, such as \\\"FastAPI 0.95 with Pydantic v2 for data validation.\\\"\\n\\nAdditionally, provide information about library dependencies and their integration points. For instance, when requesting database interaction code, specify whether to use an ORM like SQLAlchemy or raw SQL queries, and clarify connection handling expectations. This level of specificity prevents the generation of code that relies on unavailable dependencies or incompatible versions.\\n\\nExample:\\n\\n \\\"Generate a REST API endpoint using:\\n\\n - Python 3.9\\n - FastAPI 0.95 with Pydantic v2 models\\n - SQLAlchemy 2.0 for database queries\\n - JWT authentication using our existing AuthManager from auth_utils.py\\n - Must be compatible with our PostgreSQL 13 database\\\"\\n\\n## Implement Chain of Thought Prompting\\n\\n[](#implement-chain-of-thought-prompting)\\n\\nChain of thought prompting enhances code generation by guiding the LLM through a logical progression of reasoning steps. This technique involves instructing the model to decompose complex problems into sequential reasoning stages before writing code.\\n\\nSequential reasoning stages to request:\\n\\n- Initial explanation of the conceptual approach\\n- Pseudocode outline of the solution\\n- Implementation details for each component\\n- Complete integrated implementation\\n\\n\\nChain of thought prompting is effective for algorithms with complex logic or data transformations. It reduces logical errors, improves coherence, and offers visibility into the model's reasoning, allowing for corrections before the final code is produced.\\n\\nUnlike the \\\"break down into steps\\\" approach, which focuses on task decomposition, chain of thought prompting emphasizes making the model's reasoning explicit, helping ensure the logic is sound before accepting the final solution.\\n\\n## Tailor Prompts to the Model's Unique Strengths\\n\\n[](#tailor-prompts-to-the-models-unique-strengths)\\n\\nDifferent LLMs exhibit varying capabilities that can be leveraged through specialized prompting strategies.\\n\\nAdaptation strategies:\\n\\n- For limited context windows: Focus on algorithmic guidance\\n- For strong functional programming models: Frame problems using functional patterns\\n- For models with framework expertise: Leverage specific framework terminology\\n\\n\\nUnderstanding a model's training biases also informs effective prompting. Some models may excel at particular programming paradigms or languages based on their training data distribution. For instance, a model with strong representation of functional programming concepts in its training data will respond better to prompts framed in functional terms for appropriate problems.\\n\\n## Specify Edge Cases and Constraints\\n\\n[](#specify-edge-cases-and-constraints)\\n\\nComprehensive edge case consideration significantly improves code robustness. Technical edge cases vary by domain but commonly include boundary values, resource limitations, and exceptional conditions. When requesting implementations, clearly list these factors, for instance, specifying how a data processing function should handle empty inputs, malformed data, or values exceeding expected ranges.\\n\\nBy considering these constraints upfront, the generated code can incorporate appropriate validation logic, error handling mechanisms, and performance optimizations tailored to the specified limitations.\\n\\nExample:\\n\\n \\\"Implement a file processing function that handles:\\n\\n - Empty files (return empty result)\\n - Files exceeding 1GB (process in chunks)\\n - Malformed CSV data (log error, continue processing valid rows)\\n - Concurrent access (implement appropriate locking)\\n - Network interruptions (implement resume capability)\\\"\\n\\nMastering prompt engineering for code generation is both an art and a science that dramatically improves development efficiency. By implementing these strategic approaches, developers can transform LLMs from basic code generators into sophisticated development partners, enabling the creation of more robust, efficient, and maintainable software solutions.\\n\\n========================\\nIn the rapidly evolving landscape of Large Language Models (LLMs), effective prompt engineering has become a crucial skill. While much attention is given to the art of crafting effective prompts, less focus has been placed on how to efficiently manage these prompts programmatically. Python, with its rich set of built-in features, offers powerful tools to dynamically construct, optimize, and manage LLM prompts. \\nThis article explores how Python's built-in features can transform your approach to LLM prompt engineering, making your code more efficient, maintainable, and powerful.\\n\\n## [#1-using-locals-for-dynamic-context-injection](#1-using-locals-for-dynamic-context-injection)**1. Using locals() for Dynamic Context Injection**\\n\\n**The Problem** \\nWhen working with LLMs, we often need to inject contextual information into our prompts. The traditional approach involves manual string formatting: \\n\\n\\n def generate_response(user_name, user_query, previous_context):\\n prompt = f\\\"\\\"\\\"\\n User name: {user_name}\\n User query: {user_query}\\n Previous context: {previous_context}\\n\\n Please respond to the user's query considering the context above.\\n \\\"\\\"\\\"\\n\\n return call_llm_api(prompt)\\n\\n![Enter fullscreen mode]()![Exit fullscreen mode]()\\n\\nThis works well for simple cases, but becomes unwieldy as the number of variables increases. It's also error-prone – you might forget to include a variable or update a variable name.\\n\\n**The Solution with locals()** \\nPython's locals() function returns a dictionary containing all local variables in the current scope. We can leverage this to automatically include all relevant context: \\n\\n\\n def generate_response(user_name, user_query, previous_context, user_preferences=None, user_history=None):\\n # All local variables are now accessible\\n context_dict = locals()\\n\\n # Build a dynamic prompt section with all available context\\n context_sections = []\\n for key, value in context_dict.items():\\n if value is not None: # Only include non-None values\\n context_sections.append(f\\\"{key}: {value}\\\")\\n\\n context_text = \\\"\\\\n\\\".join(context_sections)\\n\\n prompt = f\\\"\\\"\\\"\\n Context information:\\n {context_text}\\n\\n Please respond to the user's query considering the context above.\\n \\\"\\\"\\\"\\n\\n return call_llm_api(prompt)\\n\\n![Enter fullscreen mode]()![Exit fullscreen mode]()\\n\\n**Benefits:**\\n\\nAutomatic variable inclusion: If you add a new parameter to your function, it's automatically included in the context. \\nReduced errors: No need to manually update string formatting when variables change. \\nCleaner code: Separates the mechanism of context injection from the specific variables.\\n\\n## [#2-using-inspect-for-function-documentation](#2-using-inspect-for-function-documentation) 2. Using inspect for Function Documentation\\n\\n**The Problem** \\nWhen creating LLM prompts that involve function execution or code generation, providing accurate function documentation is crucial: \\n\\n\\n def create_function_prompt(func_name, params):\\n prompt = f\\\"\\\"\\\"\\n Create a Python function named '{func_name}' with the following parameters:\\n {params}\\n \\\"\\\"\\\"\\n return prompt\\n\\n![Enter fullscreen mode]()![Exit fullscreen mode]()\\n\\nThis approach requires manually specifying function details, which can be tedious and error-prone.\\n\\n**The Solution with inspect** \\nPython's inspect module allows us to extract rich metadata from functions: \\n\\n\\n import inspect\\n\\n def create_function_prompt(func_reference):\\n # Get the function signature\\n signature = inspect.signature(func_reference)\\n\\n # Get the function docstring\\n doc = inspect.getdoc(func_reference) or \\\"No documentation available\\\"\\n\\n # Get source code if available\\n try:\\n source = inspect.getsource(func_reference)\\n except:\\n source = \\\"Source code not available\\\"\\n\\n prompt = f\\\"\\\"\\\"\\n Function name: {func_reference.__name__}\\n\\n Signature: {signature}\\n\\n Documentation:\\n {doc}\\n\\n Original source code:\\n {source}\\n\\n Please create an improved version of this function.\\n \\\"\\\"\\\"\\n\\n return prompt\\n\\n # Example usage\\n def example_func(a, b=10):\\n \\\"\\\"\\\"This function adds two numbers together.\\\"\\\"\\\"\\n return a + b\\n\\n improved_function_prompt = create_function_prompt(example_func)\\n # Send to LLM for improvement\\n\\n![Enter fullscreen mode]()![Exit fullscreen mode]()\\n\\nThis dynamically extracts all relevant information about the function, making the prompt much more informative.\\n\\n## [#3-context-management-with-class-attributes](#3-context-management-with-class-attributes) 3. Context Management with Class Attributes\\n\\n**The Problem** \\nManaging conversation history and context with LLMs often leads to repetitive code: \\n\\n\\n conversation_history = []\\n\\n def chat_with_llm(user_input):\\n # Manually build the prompt with history\\n prompt = \\\"Previous conversation:\\\\n\\\"\\n for entry in conversation_history:\\n prompt += f\\\"{entry['role']}: {entry['content']}\\\\n\\\"\\n\\n prompt += f\\\"User: {user_input}\\\\n\\\"\\n prompt += \\\"Assistant: \\\"\\n\\n response = call_llm_api(prompt)\\n\\n # Update history\\n conversation_history.append({\\\"role\\\": \\\"User\\\", \\\"content\\\": user_input})\\n conversation_history.append({\\\"role\\\": \\\"Assistant\\\", \\\"content\\\": response})\\n\\n return response\\n\\n![Enter fullscreen mode]()![Exit fullscreen mode]()\\n\\n**The Solution with Class Attributes and dict** \\nWe can create a conversation manager class that uses Python's object attributes: \\n\\n\\n class ConversationManager:\\n def __init__(self, system_prompt=None, max_history=10):\\n self.history = []\\n self.system_prompt = system_prompt\\n self.max_history = max_history\\n self.user_info = {}\\n self.conversation_attributes = {\\n \\\"tone\\\": \\\"helpful\\\",\\n \\\"style\\\": \\\"concise\\\",\\n \\\"knowledge_level\\\": \\\"expert\\\"\\n }\\n\\n def add_user_info(self, **kwargs):\\n \\\"\\\"\\\"Add user-specific information to the conversation context.\\\"\\\"\\\"\\n self.user_info.update(kwargs)\\n\\n def set_attribute(self, key, value):\\n \\\"\\\"\\\"Set a conversation attribute.\\\"\\\"\\\"\\n self.conversation_attributes[key] = value\\n\\n def build_prompt(self, user_input):\\n \\\"\\\"\\\"Build a complete prompt using object attributes.\\\"\\\"\\\"\\n prompt_parts = []\\n\\n # Add system prompt if available\\n if self.system_prompt:\\n prompt_parts.append(f\\\"System: {self.system_prompt}\\\")\\n\\n # Add conversation attributes\\n prompt_parts.append(\\\"Conversation attributes:\\\")\\n for key, value in self.conversation_attributes.items():\\n prompt_parts.append(f\\\"- {key}: {value}\\\")\\n\\n # Add user info if available\\n if self.user_info:\\n prompt_parts.append(\\\"\\\\nUser information:\\\")\\n for key, value in self.user_info.items():\\n prompt_parts.append(f\\\"- {key}: {value}\\\")\\n\\n # Add conversation history\\n if self.history:\\n prompt_parts.append(\\\"\\\\nConversation history:\\\")\\n for entry in self.history[-self.max_history:]:\\n prompt_parts.append(f\\\"{entry['role']}: {entry['content']}\\\")\\n\\n # Add current user input\\n prompt_parts.append(f\\\"\\\\nUser: {user_input}\\\")\\n prompt_parts.append(\\\"Assistant:\\\")\\n\\n return \\\"\\\\n\\\".join(prompt_parts)\\n\\n def chat(self, user_input):\\n \\\"\\\"\\\"Process a user message and get response from LLM.\\\"\\\"\\\"\\n prompt = self.build_prompt(user_input)\\n\\n response = call_llm_api(prompt)\\n\\n # Update history\\n self.history.append({\\\"role\\\": \\\"User\\\", \\\"content\\\": user_input})\\n self.history.append({\\\"role\\\": \\\"Assistant\\\", \\\"content\\\": response})\\n\\n return response\\n\\n def get_state_as_dict(self):\\n \\\"\\\"\\\"Return a dictionary of the conversation state using __dict__.\\\"\\\"\\\"\\n return self.__dict__\\n\\n def save_state(self, filename):\\n \\\"\\\"\\\"Save the conversation state to a file.\\\"\\\"\\\"\\n import json\\n with open(filename, 'w') as f:\\n json.dump(self.get_state_as_dict(), f)\\n\\n def load_state(self, filename):\\n \\\"\\\"\\\"Load the conversation state from a file.\\\"\\\"\\\"\\n import json\\n with open(filename, 'r') as f:\\n state = json.load(f)\\n self.__dict__.update(state)```\\n\\n\\n\\n Using this approach:\\n\\n # Create a conversation manager\\n convo = ConversationManager(system_prompt=\\\"You are a helpful assistant.\\\")\\n\\n # Add user information\\n convo.add_user_info(name=\\\"John\\\", expertise=\\\"beginner\\\", interests=[\\\"Python\\\", \\\"AI\\\"])\\n\\n # Set conversation attributes\\n convo.set_attribute(\\\"tone\\\", \\\"friendly\\\")\\n\\n # Chat with the LLM\\n response = convo.chat(\\\"Can you help me understand how Python dictionaries work?\\\")\\n print(response)\\n\\n # Later, save the conversation state\\n convo.save_state(\\\"conversation_backup.json\\\")\\n\\n # And load it back\\n new_convo = ConversationManager()\\n new_convo.load_state(\\\"conversation_backup.json\\\")\\n\\n\\n![Enter fullscreen mode]()![Exit fullscreen mode]()\\n\\n## [#4-using-dir-for-object-exploration](#4-using-dir-for-object-exploration)**4. Using dir() for Object Exploration**\\n\\n**The Problem** \\nWhen working with complex objects or APIs, it can be challenging to know what data is available to include in prompts:\\n\\n def generate_data_analysis_prompt(dataset):\\n # Manually specifying what we think is available\\n prompt = f\\\"\\\"\\\"\\n Dataset name: {dataset.name}\\n Number of rows: {len(dataset)}\\n\\n Please analyze this dataset.\\n \\\"\\\"\\\"\\n return prompt\\n\\n\\n![Enter fullscreen mode]()![Exit fullscreen mode]()\\n\\n**The Solution with dir()** \\nPython's dir() function lets us dynamically discover object attributes and methods:\\n\\n def generate_data_analysis_prompt(dataset):\\n # Discover available attributes\\n attributes = dir(dataset)\\n\\n # Filter out private attributes (those starting with _)\\n public_attrs = [attr for attr in attributes if not attr.startswith('_')]\\n\\n # Build metadata section\\n metadata = []\\n for attr in public_attrs:\\n try:\\n value = getattr(dataset, attr)\\n # Only include non-method attributes with simple values\\n if not callable(value) and not hasattr(value, '__dict__'):\\n metadata.append(f\\\"{attr}: {value}\\\")\\n except:\\n pass # Skip attributes that can't be accessed\\n\\n metadata_text = \\\"\\\\n\\\".join(metadata)\\n\\n prompt = f\\\"\\\"\\\"\\n Dataset metadata:\\n {metadata_text}\\n\\n Please analyze this dataset based on the metadata above.\\n \\\"\\\"\\\"\\n\\n return prompt\\n\\n\\n\\n![Enter fullscreen mode]()![Exit fullscreen mode]()\\n\\nThis approach automatically discovers and includes relevant metadata without requiring us to know the exact structure of the dataset object in advance.\\n\\n## [#5-string-manipulation-for-prompt-cleaning](#5-string-manipulation-for-prompt-cleaning)**5. String Manipulation for Prompt Cleaning**\\n\\n**The Problem** \\nUser inputs and other text data often contain formatting issues that can affect LLM performance:\\n\\n def process_document(document_text):\\n prompt = f\\\"\\\"\\\"\\n Document:\\n {document_text}\\n\\n Please summarize the key points from this document.\\n \\\"\\\"\\\"\\n return call_llm_api(prompt)\\n\\n\\n\\n![Enter fullscreen mode]()![Exit fullscreen mode]()\\n\\n**The Solution with String Methods** \\nPython's rich set of string manipulation methods can clean and normalize text:\\n\\n def process_document(document_text):\\n # Remove excessive whitespace\\n cleaned_text = ' '.join(document_text.split())\\n\\n # Normalize line breaks\\n cleaned_text = cleaned_text.replace('\\\\r\\\\n', '\\\\n').replace('\\\\r', '\\\\n')\\n\\n # Limit length (many LLMs have token limits)\\n max_chars = 5000\\n if len(cleaned_text) > max_chars:\\n cleaned_text = cleaned_text[:max_chars] + \\\"... [truncated]\\\"\\n\\n # Replace problematic characters\\n for char, replacement in [('\\\\u2018', \\\"'\\\"), ('\\\\u2019', \\\"'\\\"), ('\\\\u201c', '\\\"'), ('\\\\u201d', '\\\"')]:\\n cleaned_text = cleaned_text.replace(char, replacement)\\n\\n prompt = f\\\"\\\"\\\"\\n Document:\\n {cleaned_text}\\n\\n Please summarize the key points from this document.\\n \\\"\\\"\\\"\\n\\n return call_llm_api(prompt)\\n\\n\\n\\n![Enter fullscreen mode]()![Exit fullscreen mode]()\\n\\n## [#conclusion](#conclusion)**Conclusion**\\n\\nPython's built-in features offer powerful capabilities for enhancing LLM prompts:\\n\\n**Dynamic Context:** Using locals() and **dict** to automatically include relevant variables \\n**Introspection:** Using inspect and dir() to extract rich metadata from objects and functions \\n**String Manipulation:** Using Python's string methods to clean and normalize text\\n\\nBy leveraging these built-in features, you can create more robust, maintainable, and dynamic LLM interactions. The techniques in this article can help you move beyond static prompt templates to create truly adaptive and context-aware LLM applications. \\nMost importantly, these approaches scale well as your LLM applications become more complex, allowing you to maintain clean, readable code while supporting sophisticated prompt engineering techniques. \\nWhether you're building a simple chatbot or a complex AI assistant, Python's built-in features can help you create more effective LLM interactions with less code and fewer errors.\\n\\n========================\\nWhen ChatGPT launched in late 2022, it sparked an immediate rush to figure out how to get the most out of these powerful new [language models](/resources/articles/large-language-models). The practice was quickly dubbed ”prompt engineering,” and with it came breathless predictions that “prompt engineer” would become the hottest job title of the decade. LinkedIn filled up with self-proclaimed experts, and rumors of six-figure salaries circulated widely. Fast forward to now, and that initial hype cycle has [cooled considerably](https://fortune.com/2025/05/07/prompt-engineering-200k-six-figure-role-now-obsolete-thanks-to-ai/).\\n\\nBut the underlying skill—crafting inputs that produce genuinely useful outputs—has only grown more important as AI tools have embedded themselves into real workflows. Whether you’re generating a quarterly analysis with [Claude Sonnet 4.5](https://www.anthropic.com/news/claude-sonnet-4-5), producing product photos with Google’s [Nano Banana](/resources/articles/nano-banana) Pro, or debugging a complex function with [OpenAI’s Codex CLI](https://developers.openai.com/codex/cli/), the difference between a vague instruction and a well-crafted prompt may mean the difference between getting generic outputs and exactly what you need. Read on for prompt engineering best practices—from using few-shot examples to guide output format to breaking complex tasks into smaller steps—that will help you get useful results on the first try instead of the fifth.\\n\\n**Key takeaways**:\\n\\n- Prompt engineering is the process of writing and structuring inputs so large language models produce clearer, more accurate, and more useful responses for a given task.\\n\\n- Key techniques include zero-shot prompting, few-shot prompting, chain-of-thought prompting, tree-of-thought prompting, role or persona prompting, prompt chaining, meta-prompting, and reflection prompting to guide reasoning and output quality.\\n\\n- Prompt engineering best practices focus on being specific, providing clear context, examples, and data, defining the desired output, and giving instructions on what to do rather than what to avoid.\\n\\n- Strong results from LLMs come from iterating on prompts, testing variations, understanding model limitations, and refining inputs based on the quality and consistency of responses.\\n\\n## [What is prompt engineering?](#what-is-prompt-engineering)\\n\\nPrompt engineering is the process of carefully designing the inputs you give to large language models to get desired outputs. It includes word choice, how you order instructions, what context you provide, and whether you include examples. At a more advanced level, it means knowing when to use techniques like few-shot prompting—providing input-output pairs so the model learns your desired format—or chain-of-thought prompting, where you ask the model to reason step-by-step before delivering a final answer. And despite the name, prompt engineering isn’t just for engineers: it’s for anyone using [AI assistant tools](/resources/articles/claude-ai-alternative-assistants) like Claude, Gemini, or ChatGPT who wants better results, whether you’re writing code or drafting social posts.\\n\\nWatch how to use prompting in Cursor with [DigitalOcean’s MCP server](https://github.com/digitalocean-labs/mcp-digitalocean) to build, deploy, update, and manage an [App Platform](/products/app-platform) React app using natural language instead of juggling terminals, dashboards, and scripts. Follow along as a simple React app goes from setup to live deployment and iteration entirely from within your editor.\\n\\n[https://www.youtube.com/embed/kpJjydsqvBA](https://www.youtube.com/embed/kpJjydsqvBA)\\n\\n## [Why is prompt engineering important?](#why-is-prompt-engineering-important)\\n\\nAs models like Llama 4, [Grok 3](/resources/articles/grok-vs-chatgpt), and [DeepSeek-R1](/community/tutorials/deepseek-r1-large-language-model-capabilities) grow more capable, the quality of your input increasingly determines the quality of your output. In other words, how you phrase a request shapes what you get back. A poorly structured prompt may require multiple back-and-forth exchanges to clarify intent, whereas a thoughtfully engineered prompt yields usable results on the first (or second) attempt.\\n\\n- **It reduces hallucinations and improves factual accuracy**: Explicitly giving the model permission to say “I don’t know” when it lacks sufficient information makes it less likely to [hallucinate](/resources/articles/ai-hallucination) and fabricate answers. Providing relevant context and source material gives the model a factual foundation to work from, rather than relying solely on training data.\\n\\n- **It unlocks better performance for code generation**: When using tools like OpenAI’s [Codex CLI](https://chatgpt.com/features/codex/) or asking Claude to debug a script, prompts that include leading words like “import” or “SELECT” nudge the model toward the correct output pattern. Specifying the programming language, expected inputs and outputs, and edge cases to handle produces code that’s closer to production-ready.\\n\\n- **It makes complex reasoning more transparent and reliable**: Chain-of-thought prompting asks the model to reason step-by-step before delivering a final answer, which is especially valuable for multi-step math problems or analytical tasks. This technique surfaces the model’s logic so you spot errors mid-process rather than discovering them in the final output.\\n\\n- **It gives you control over output format and structure**: Few-shot prompting with input-output examples teaches the model the exact style, tone, or schema you’re looking for, whether that’s a [JSON](/community/tutorials/an-introduction-to-json) object, a bulleted summary, or a formal report.\\n\\n- **It improves results for image generation and multimodal tasks**: When prompting image models like [DALL-E 3](/resources/articles/dall-e-alternatives) or [Midjourney](/resources/articles/midjourney-alternatives), specificity around lighting, composition, and style dramatically affects output quality. The same principle applies to [vision-language models](/community/tutorials/visualizing-vision-language-models-multimodal-reasoning): asking Claude to analyze a chart step-by-step in structured tags yields more accurate data extraction than a generic “describe this image” request.\\n\\n## [Types of prompting engineering techniques](#types-of-prompting-engineering-techniques)\\n\\nDifferent tasks call for different approaches. A simple question might need nothing more than a clear instruction, while a multi-step analysis might require examples, [structured reasoning](/community/tutorials/understanding-reasoning-in-llms), or breaking the work into stages. The techniques below range from basic to advanced, and they can (and should) be combined depending on what you’re trying to accomplish.\\n\\n| Technique | What it does | Best for | Example use case |\\n| --- | --- | --- | --- |\\n| Zero-shot | Direct instruction with no examples | Simple, well-defined tasks | “Translate this sentence to French” |\\n| Few-shot | Provides input-output examples before the request | Format-sensitive or style-specific outputs | Teaching the model your preferred summary format |\\n| Chain-of-thought | Asks the model to reason step-by-step | Math, logic, and multi-step analysis | Walking through a word problem before answering |\\n| Tree-of-thought | Explores multiple reasoning paths before selecting the best one | Complex problems requiring strategic planning | Solving puzzles or multi-step decisions with backtracking |\\n| Role or persona | Assigns the model an identity or expertise | Domain-specific tasks requiring a particular tone | “You are a senior Python developer reviewing this code” |\\n| Prompt chaining | Breaks complex tasks into sequential prompts | Multi-stage workflows with dependencies | Summarize → critique → revise |\\n| Meta-prompting | Uses an LLM to generate or improve prompts | Optimizing prompts at scale or for reusable templates | Having a model refine your summarization prompt |\\n| Reflection prompting | Asks the model to critique and refine its own output | Catching errors and improving accuracy | “Review your answer for mistakes and correct them” |\\n\\n### [Zero-shot prompting](#zero-shot-prompting)\\n\\nZero-shot prompting is the most straightforward approach: you give the model a direct instruction without any examples. This works well when the task is clear, and the expected output format is obvious, like asking “What’s the capital of Japan?” or “Summarize this paragraph in two sentences.” The model relies entirely on its training to interpret what you want. Zero-shot is fast and requires minimal setup, but it falls short when you need a specific structure, tone, or style that isn’t implicit in the instruction itself.\\n\\n### [Few-shot prompting](#few-shot-prompting)\\n\\nFew-shot prompting provides one or more input-output examples before your actual request, helping the model learn the pattern you’re looking for. For instance, if you want summaries in a specific format, you might show two examples of articles paired with your preferred summary style, then ask the model to summarize a third article the same way. This technique is especially useful when the desired output involves subtle formatting, tone, or conventions that are hard to describe in words alone, or when your data involves multiple different categorizations or classifications that benefit from clarification. Start with one example (one-shot), then add more if the output still isn’t matching your expectations.\\n\\nMaster [few-shot prompting](/community/tutorials/_few-shot-prompting-techniques-examples-best-practices) by providing input-output examples that teach LLMs to recognize patterns without fine-tuning. Learn when to use zero-shot vs. few-shot approaches, plus implementation techniques with the OpenAI API and LangChain.\\n\\n### [Chain-of-thought (CoT) prompting](#chain-of-thought-cot-prompting)\\n\\nChain-of-thought prompting asks the model to show its reasoning before delivering a final answer. The simplest version is adding “Think step-by-step” to your prompt, which encourages the model to work through the problem rather than jumping straight to a conclusion. For example, instead of asking “What’s 15% of 340?” you might prompt: “Calculate 15% of 340. Show your work step-by-step, then give the final answer.”\\n\\nThis technique shines when it comes to math problems, logic puzzles, and analytical tasks where intermediate steps matter. It also makes errors easier to spot—if the model’s reasoning goes wrong at step two, you get to catch it before it compounds into a bad final answer. Some models, like [Claude’s extended thinking mode](https://www.anthropic.com/news/visible-extended-thinking), automate this process, but manual CoT remains valuable when you need transparent, reviewable logic.\\n\\n### [Tree-of-thought (ToT) prompting](#tree-of-thought-tot-prompting)\\n\\nTree-of-thought prompting extends chain-of-thought by exploring multiple reasoning paths simultaneously rather than following a single chain. Instead of committing to one line of reasoning, the model generates several possible approaches, evaluates each one, and backtracks if a path isn’t working—similar to how humans solve complex problems through trial and error.\\n\\nThis technique is especially useful for tasks that require strategic planning, like game-playing or multi-step decisions, where an early wrong choice may derail the entire solution. ToT is more resource-intensive than simpler methods and is typically overkill for straightforward tasks, but it outperforms other techniques on problems that benefit from deliberate exploration and self-evaluation. A [study](https://arxiv.org/pdf/2305.10601) conducted by researchers at Princeton and Google DeepMind found that tree-of-thought prompting achieved a 74% success rate on the Game of 24 benchmark—a math puzzle using four numbers to reach 24. This result was much higher than chain-of-thought (49%) and standard input-output prompting (33%)—but at a higher cost per case ($0.74 vs. $0.47 and $0.13).\\n\\n### [Role or persona prompting](#role-or-persona-prompting)\\n\\nRole prompting assigns the model a specific identity, expertise, or perspective to shape the scope of how it responds. A prompt like “You are an experienced immigration attorney” will produce a different answer than “You are a travel blogger” when asked about visa requirements—the attorney focuses on legal accuracy, while the blogger might emphasize practical tips. This technique is useful when you need domain-specific vocabulary, a particular communication style, or responses calibrated to a certain audience.\\n\\nThat said, modern models are sophisticated enough that heavy-handed role assignments (“You are a world-renowned expert who never makes mistakes”) may actually backfire by limiting helpfulness. A lighter touch often works better: instead of assigning a role, try being explicit about the perspective you want, like “Analyze this investment portfolio with a focus on risk tolerance and long-term growth.”\\n\\n### [Prompt chaining](#prompt-chaining)\\n\\nPrompt chaining breaks a complex task into smaller sequential steps, where the output of one prompt becomes the input for the next. Rather than asking a model to “research this topic, outline an article, write the draft, and edit it for tone” in a single prompt, you’d handle each stage separately: first generate research notes, then feed those into an outline prompt, then expand the outline into a draft, and finally run an editing pass. This approach trades speed for accuracy—each individual prompt is easier for the model to execute well, and you get to review or adjust the output at each stage before moving on.\\n\\nPrompt chaining is particularly effective for workflows that involve analysis followed by synthesis, like summarizing a document, grading the summary for completeness, then improving it based on that feedback. It’s typically implemented programmatically via API or AI workflow tools, but you can also do it manually in a chat interface by copying outputs between prompts. The manual approach gives you human-in-the-loop control over complex workflows, letting you apply your own judgment and make adjustments between steps.\\n\\n### [Meta-prompting](#meta-prompting)\\n\\nMeta-prompting uses an LLM to generate, refine, or improve prompts rather than writing them yourself from scratch. Instead of crafting every detail manually, you provide a high-level description of what you want and let the model propose an improved version—or you use a more capable model to optimize prompts for a less capable one.\\n\\nThis technique is useful when you need reusable prompt templates, want to systematically improve prompts across a category of tasks, or are building applications where prompt quality directly impacts results at scale. Tools like [ChatPRD](https://www.chatprd.ai/) are built around this idea—they take your high-level product concepts and turn them into structured prompts that AI can use to generate apps or features. Meta-prompting lets you work at the level of what you want rather than getting bogged down in the exact phrasing needed to make AI understand it.\\n\\n### [Reflection prompting](#reflection-prompting)\\n\\nReflection prompting asks the model to critique, evaluate, or refine its own output before delivering a final answer. After generating an initial response, you prompt the model to review its work—checking for errors, inconsistencies, or areas for improvement—and then produce a revised version based on that self-assessment.\\n\\nUse this approach for tasks where accuracy matters and a second pass catches mistakes that slipped through the first attempt. For example, after generating code, you might prompt: “Review the code above for bugs or edge cases, then provide a corrected version.” Reflection adds [latency](/resources/articles/network-latency) since it requires additional processing, but it often improves output quality on complex reasoning, writing, and problem-solving tasks.\\n\\n## [10 prompt engineering best practices](#10-prompt-engineering-best-practices)\\n\\nWhether you’re generating code, drafting marketing copy, analyzing data, or building automated workflows, the following practices apply across use cases and prompting techniques. Think of them as universal habits that make any prompt more effective—regardless of the specific model you’re using or what you’re trying to accomplish.\\n\\nChallenge yourself with a fun image prompt guessing game that reveals how specific wording guides generative results, sharpening your understanding of effective prompting. See how small changes in phrasing influence outputs and build intuition for prompt design with hands-on feedback.\\n\\n[https://www.youtube.com/embed/Kfzr6wR8LNs](https://www.youtube.com/embed/Kfzr6wR8LNs)\\n\\n### [1. Be as specific as possible](#1-be-as-specific-as-possible)\\n\\nSpecificity is key to obtaining the most accurate and relevant information from an AI when writing prompts. A specific prompt minimizes ambiguity, allowing the AI to understand the request’s context and nuance, preventing it from providing overly broad or unrelated responses. To achieve this, include as many relevant details as possible without overloading the AI with superfluous information so context stays focused. This balance ensures that the AI has just enough guidance to produce the specific outcome you’re aiming for.\\n\\nWhen creating the best prompts for an AI, ask for the following specifics:\\n\\n- **Detailed context**: Provide the AI with enough background information to understand the scenario you’re inquiring about. This includes the subject matter, scope, and any relevant constraints.\\n\\n- **Desired format**: Clearly specify the format in which you want the information to be presented, whether it’s a list, a detailed report, bullet points, or a summary. Mention any structural preferences, such as headings, subheadings, or paragraph limits.\\n\\n- **Output length**: Detail how long you want the AI’s response, whether that’s “3 paragraphs” or “250 words.”\\n\\n- **Level of detail**: Indicate the level of detail required for the response, from high-level overviews to in-depth analysis, to ensure the model’s output matches your informational needs. For example, “Give me a five-sentence summary of this report” will produce a very different output than “Walk me through each section of this report and explain the key findings in detail.”\\n\\n- **Tone and style**: A prompt asking for “a professional summary for stakeholders” will produce something very different from “a casual explainer for a blog audience.” If you have a specific lens in mind—formal, conversational, persuasive, technical—say so upfront rather than editing after the fact.\\n\\n- **Examples and comparisons**: Asking the model to include analogies or concrete examples often makes the output more useful and easier to apply. A prompt like “Explain how neural networks learn and give me a real-world comparison” produces a more memorable response than one that stays purely abstract—like “Explain how neural networks learn,” which might give you a technically accurate but harder-to-grasp explanation\\n\\n**💡 Detailed report outline prompt example**:\\n\\nPlease provide an outline for a comprehensive report that analyzes the current trends in social media marketing for technology companies, focusing on the developments from 2025 onward. Include space for both established platforms like Instagram, Facebook, and X, as well as emerging options like Bluesky and Threads.\\n\\nThe outline should include an introduction, three main sections addressing different aspects of social media trends, and a conclusion summarizing the findings. Please suggest the types of graphs that could illustrate user engagement trends and list bullet points that summarize key marketing strategies in each section.\\n\\n### [2. Supply the AI with examples](#2-supply-the-ai-with-examples)\\n\\nIncorporating examples into your prompts steers the AI’s responses in the desired direction. By providing examples as you write prompts, you set a precedent for the type of information or response you expect. This practice is particularly useful for complex tasks where the desired output might be ambiguous or for creative tasks with more than one relevant answer.\\n\\nWhen you supply the AI with examples, ensure they represent the quality and style of your desired result. This strategy clarifies your expectations and helps the AI model its responses after the examples provided, leading to more accurate and tailored outputs.\\n\\nHere are some example types you could provide to an AI to help guide it toward generating the best response possible:\\n\\n- **Sample texts**: Share excerpts reflecting the style, tone, and content you want the AI to replicate. For instance, if you’re writing a company newsletter, paste a previous edition and prompt: “Write next month’s newsletter update in this same voice and format.”\\n\\n- **Data formats**: To guide the AI’s output, provide specific data structures, such as table layouts or spreadsheet formats. You might include a sample row like “| Product Name | SKU | Price | Stock |” and ask the model to populate the rest based on a product list you provide.\\n\\n- **Document templates**: Offer templates to ensure the AI’s response follows a desired structure and format. For a weekly status report, you could share your existing template with headers like “Accomplishments,” “Blockers,” and “Next Steps,” then ask the AI to fill it in based on your notes.\\n\\n- **Code snippets**: Provide code examples if you need help with programming tasks to ensure correct syntax and logic. If you want a [Python](/community/tutorials/python-tutorial) function that follows your team’s conventions, paste an existing function and prompt: “Write a new function that validates email addresses using the same structure and commenting style.”\\n\\n- **Graphs and charts examples**: If you’re asking the AI to create similar graphics, share samples of visual data representation. You might include a screenshot of a bar chart from a previous quarterly report with the prompt: “Generate the data and labels for a chart like this showing Q3 performance.”\\n\\n- **Marketing copy**: If you’re crafting marketing content, share ad copy that aligns with your brand’s voice for the AI to mimic. For example, paste a landing page headline and subhead from a previous product launch and prompt: “Write three variations of launch copy for our new API feature using this same tone and structure.”\\n\\n**💡 Comparison table prompt example**:\\n\\nCreate a comparison table for two project management tools, Tool A and Tool B.\\n\\nInclude the following categories: Price, Key Features, User Reviews, and Support Options. For instance, under Key Features, list things like ‘Task Assignment’, ‘Time Tracking’, and ‘File Sharing’.\\n\\nThe format should mirror something like this:\\n\\n| Feature | Tool A | Tool B |\\n| --- | --- | --- |\\n| Price | $X per user/month | $Y per user/month |\\n| Key Features | Task Assignment, Time Tracking, File Sharing | Task Assignment, Time Tracking, File Sharing |\\n| User Reviews | X stars | Y stars |\\n| Support Options | 24/7 Live Chat, Email | Business Hours Phone, Email |\\n\\nPlease ensure the table is concise and suitable for inclusion in a business report.\\n\\n### [3. Get better answers by providing data](#3-get-better-answers-by-providing-data)\\n\\nMost LLMs now allow you to attach documents, images, spreadsheets, and other files directly to your prompts—and many also connect to tools like Google Drive, Notion, or GitHub to pull in context automatically. This makes it easy to incorporate specific data into your prompts… However, just because you can connect these tools doesn’t mean you should give unfettered access to all your business and personal files. Be selective: only share what’s necessary for the task at hand, and check whether your data might be used for model training—most platforms let you [opt out of this in settings](https://help.openai.com/en/articles/7730893-data-controls-faq).\\n\\nWith [AI privacy](/resources/articles/ai-and-privacy) in mind, incorporating data into prompts improves the quality of AI-generated responses, providing a solid foundation for the AI to understand the context and craft precise answers. Provide data that includes numerical values, dates, or categories, organized in a clear and structured way. Also, remember to give context to the data and, when possible, to cite its source. When you give the AI concrete data with clear context, it does more than summarize—it analyzes trends, spots patterns, and surfaces insights you might have missed. This is especially valuable for tasks like financial analysis, market research, or performance reporting, where the output needs to be grounded in real numbers rather than general observations.\\n\\nIf you need to work with a specific set of documents rather than the entire web, tools like [NotebookLM](https://notebooklm.google/) let you upload your own files and get citation-backed responses grounded only in that material. For a broader look at research-focused AI tools that handle documents and data differently than general chatbots, check out our guide to [Perplexity alternatives](/resources/articles/perplexity-alternatives).\\n\\n**💡 Quarterly sales analysis prompt example**:\\n\\nPlease analyze the sales data from the first quarter of 2025 provided in the attached PDF document. I need a summary that identifies our best-selling product, the overall sales trend, and any notable patterns in customer purchases.\\n\\nThe PDF contains detailed monthly sales units for three products: Product A, Product B, and Product C. After reviewing the data, summarize your findings in a concise paragraph that is suitable for a weekly business review. Highlight significant increases or decreases in sales and offer insights into potential factors driving these trends.\\n\\n### [4. Specify your desired output](#4-specify-your-desired-output)\\n\\nWhether you’re chatting with [Gemini or ChatGPT](/resources/articles/gemini-vs-chatgpt), articulate the precise format and structure you expect in the response. Specify whether you require a detailed report, a summary, bullet points, or a narrative form to ensure the AI tailors its output to your needs. For instance, a prompt like “Give me a bulleted list of key takeaways” will produce something very different from “Write this up as a two-paragraph executive summary.”\\n\\nIndicate any preferences such as tone, style, and the inclusion of certain elements like headings or subheadings. If you need a specific structure—like a blog post with an H1, three H2 sections, and a conclusion—say so explicitly rather than hoping the model infers it. By clearly defining your desired output, you guide the AI to deliver information that aligns with your intended use.\\n\\n**💡 Historical timeline prompt example**:\\n\\nCreate a comprehensive overview of the key milestones in the history of software development. The output should be structured as a timeline with bullet points, each bullet including the year, the milestone event, and a brief description of its significance. Start from the 1980s. The tone should be educational. Please limit the overview to twenty major milestones to maintain conciseness.\\n\\n### [5. Provide instructions on what to do instead of what *not* to do](#5-provide-instructions-on-what-to-do-instead-of-what-_not_-to-do)\\n\\nWhile LLMs continue to evolve and improve, they still stumble on negated prompts—instructions telling them what not to do, like “avoid bullet points.” A [study](https://arxiv.org/abs/2209.12711) conducted by researchers at KAIST found that larger models actually perform worse on these types of instructions, struggling more than smaller models to process negative framing. When constructing prompts for AI, it’s more effective to direct the system toward the desired action rather than detailing what it should avoid. This positive instruction approach reduces ambiguity and focuses the AI’s processing power on generating constructive outcomes.\\n\\nNegative instructions often require the AI to interpret and invert them, increasing the cognitive load and potential for misunderstanding. By clearly stating the intended actions, you enable the AI to apply its capabilities directly to fulfilling the task at hand, improving the efficiency and accuracy of the response.\\n\\n**💡 Positive instruction framing prompt examples**:\\n\\n- Avoid: \\\"Don’t write too much detail. → Use Instead: “Please provide a concise summary.”\\n\\n- Avoid: “Don’t use technical jargon.” → Use Instead: “Write with clear and simple language accessible to a general audience.”\\n\\n- Avoid: “Don’t give examples from before the year 2000.” → Use Instead: “Provide examples from the year 2000 onwards.”\\n\\n### [6. Give the model a persona or frame of reference](#6-give-the-model-a-persona-or-frame-of-reference)\\n\\nPersona prompting is a technique in its own right, but it also complements other strategies, such as few-shot prompting and chain-of-thought reasoning. By assigning the model a specific identity or expertise, you shape the tone, vocabulary, and depth of the response to match your needs.\\n\\nThis works across use cases:\\n\\n- **For generating images**: “You are a fine art photographer with a focus on dramatic lighting.”\\n\\n- **For coding**: “You are a senior backend engineer reviewing this Python script for security vulnerabilities.”\\n\\n- **For writing**: “You are a tech journalist explaining this concept to a non-technical audience.”\\n\\nThe persona sets expectations and helps the model calibrate its output to the task at hand.\\n\\n**💡 Product manager persona prompt example**:\\n\\nYou are a senior technical product manager preparing for customer discovery interviews. Based on the attached product requirements document, draft a list of 10 open-ended questions to ask prospective users about their current workflow pain points and feature priorities.\\n\\nFocus on questions that will surface unmet needs without leading the respondent toward a specific answer. Keep the tone conversational but professional.\\n\\n### [7. Try a prompt optimizer tool](#7-try-a-prompt-optimizer-tool)\\n\\nIf you’re running the same prompt repeatedly—for data extraction, content generation, or any structured task—consider using a prompt optimizer to automatically improve it. These tools test variations of your prompt, evaluate the results against your criteria, and iteratively refine the instructions until performance improves. The process is similar to what you’d do manually, but faster and more systematic: the optimizer proposes changes, measures their effectiveness, and retains the best-performing versions.\\n\\nHere are some prompt optimization tools to explore:\\n\\n- [DSPy (with GEPA optimizer)](https://github.com/stanfordnlp/dspy): Open-source framework that treats prompts as learnable parameters and uses reflection-based optimization.\\n\\n- [OpenAI Prompt Optimizer](https://platform.openai.com/docs/guides/prompt-optimizer): Built-in tool for refining prompts using your own datasets.\\n\\n- [Opik Agent Optimizer](https://www.comet.com/docs/opik/agent_optimization/algorithms/metaprompt_optimizer): Supports multiple optimization algorithms, including evolutionary and Bayesian approaches.\\n\\n- [LangWatch](https://langwatch.ai/prompt-optimizer): LLM ops platform with prompt optimization studio.\\n\\n- [PromptPerfect](https://promptperfect.jina.ai/): Automatic optimization for text and image model prompts.\\n\\nEven if you’re not ready to run an optimizer yourself, the takeaway still applies: treat prompt engineering as an iterative process, and don’t be afraid to test multiple variations to find what works best.\\n\\nExplore how [DSPy replaces manual prompt tuning](/community/tutorials/prompting-with-dspy) with declarative programs, adaptive modules, and built-in optimizers that improve language model behavior through compilation. See practical examples of question answering and retrieval-augmented pipelines that show measurable gains in accuracy with minimal training data.\\n\\n### [8. Split complex tasks into simpler ones](#8-split-complex-tasks-into-simpler-ones)\\n\\nWhen dealing with complex tasks, breaking them into simpler, more manageable components (or prompt chaining) makes them more approachable for an LLM. This applies to everything from writing a multi-section report to building a financial model, debugging a codebase, developing a content calendar, or drafting a legal contract. Using step-by-step instructions helps prevent the AI from becoming overwhelmed and ensures that each part of the task is handled with attention to detail.\\n\\nAdditionally, this approach allows for easier monitoring and adjustment of each step, facilitating better quality control throughout the process. By compartmentalizing tasks, the AI also uses its resources more efficiently, allocating the necessary attention where it’s most needed.\\n\\n**💡 Task breakdown prompt example**:\\n\\nAvoid a single broad prompt:\\n\\n- “Write a 1500-word article on the impact of AI on remote work.”\\n\\n\\nTry an initial prompt and follow-up prompts instead:\\n\\n1. “Develop a detailed outline for a 1500-word article titled ‘Revolutionizing Remote Work: The Role of AI for Tech Professionals.’ The outline should include an engaging introduction, three main sections titled ‘Enhancing Productivity with AI Tools,’ ‘AI-Driven Communication Optimization,’ and ‘Advanced Project Management through AI,’ plus a conclusion that offers a perspective on future developments.”\\n\\n2. “Compose a detailed introduction for the article ‘Revolutionizing Remote Work: The Role of AI for Tech Professionals.’ The introduction should be 150-200 words, setting the stage for how AI is changing the game for remote workers in the tech industry, and providing a hook that will entice tech professionals to continue reading.”\\n\\n### [9. Understand the model’s shortcomings](#9-understand-the-model-s-shortcomings)\\n\\nIn crafting prompts for an AI, recognize the model’s limitations to set realistic expectations. Prompting AI to perform tasks it’s not designed for—or expecting certainty where it only offers probabilities—will lead to frustration and potentially misleading outputs.\\n\\nHere are some known shortcomings of AI models:\\n\\n- **Hallucinations are inherent, not a bug**: LLMs predict the next most likely word based on patterns—they don’t verify truth. When uncertain, they often generate plausible-sounding but fabricated information.\\n\\n- **Context window limits affect long inputs**: Models only process a fixed number of tokens at once. In long documents or conversations, earlier context may get lost, leading to contradictions or forgotten details—a phenomenon sometimes called the “[lost in the middle](https://arxiv.org/abs/2307.03172)” effect.\\n\\n- **Training data has a cutoff**: Models don’t have access to events or information after their training cutoff date unless they search the web or access external tools.\\n\\n- **Confidence doesn’t equal accuracy**: A model often sounds equally confident about correct facts and complete fabrications. High confidence in a response is not a reliable indicator of correctness, so it remains important to validate facts yourself.\\n\\n- **Complex reasoning compounds errors**: In multi-step tasks, small errors early on may snowball into larger inaccuracies by the end—especially in longer outputs or chain-of-thought reasoning.\\n\\n- **Nuance and ambiguity are challenging**: Sarcasm, cultural references, and domain-specific jargon often trip up even advanced models, leading to misinterpretations or overly literal responses.\\n\\n- **Training data reflects societal biases**: Models trained on internet text absorb [biases](/resources/articles/ai-bias) around race, gender, political ideology, geographic regions, and economic status. The result can be outputs that favor certain perspectives or make unwarranted assumptions about demographics and context.\\n\\n### [10. Take an experimental approach to prompting](#10-take-an-experimental-approach-to-prompting)\\n\\nPrompt engineering has evolved from a niche skill to a more established discipline, characterized by dedicated research, published best practices, and a growing library of techniques. But it’s still a field where experimentation pays off. The best practices in this guide—and others online—are a starting point, not a ceiling. These tools have only been commercially available for a couple of years, which means there are no established experts—just early practitioners learning alongside everyone else. You’ll often discover techniques specific to your use case, your model, or your workflow that aren’t documented anywhere.\\n\\nTest your prompts iteratively. Small changes in wording, structure, or instruction order alter the output, and you’ll only learn how a model responds by trying things out. What works for code generation in Claude might not work the same way in [GPT-5](/resources/articles/gpt-5-overview) and a prompt that produces great results for image generation in Midjourney might need a completely different approach in DALL-E. Keep notes on what works and what doesn’t across different models and tasks—whether you’re writing marketing copy, debugging Python scripts, or generating product visuals—and don’t be afraid to deviate from established patterns if you find something that performs better.\\n\\n## [Defensive prompt engineering](#defensive-prompt-engineering)\\n\\nIf you’re [building AI products](/resources/articles/building-ai-products) that accept user input and pass it to an LLM, you need to think about prompt injection—a type of attack where malicious inputs manipulate the model into ignoring its original instructions or revealing sensitive information. This isn’t just a concern for developers building [AI chatbots](/resources/articles/ai-agent-vs-ai-chatbot); it applies to any workflow where untrusted text gets processed by a model, from document summarizers to [AI-powered search tools](/resources/articles/ai-search-engines-for-research).\\n\\nThe following tactics help reduce your exposure, though no single defense is foolproof—layering multiple approaches provides the strongest protection.\\n\\n- **Use delimiters to separate instructions from user input**: Wrapping user-provided content in clear markers like ###, “”\\\", or XML tags helps the model distinguish between your system instructions and the data it should process. Pair this with instructions telling the model to treat the delimited content as inert data, not commands to follow.\\n\\n- **Sanitize and validate inputs before they reach the model**: Scan incoming text for suspicious patterns like “ignore previous instructions,” “forget your rules,” or attempts to inject your own delimiter characters. Filtering known attack phrases and rejecting abnormally long or strangely formatted inputs adds friction for anyone trying to manipulate the system.\\n\\n- **Apply the principle of least privilege**: Restrict your LLM application’s access to only the data sources and actions it genuinely needs, and use the lowest permissions possible. This doesn’t prevent prompt injection, but it limits the damage if an attack succeeds.\\n\\n- **Audit outputs before acting on them**: If your LLM generates commands, code, or API calls, validate those outputs through rule-based checks or a secondary classifier before executing them. Treating the model’s output as untrusted—just like user input—prevents a successful injection from cascading into your systems.\\n\\n- **Test your prompts adversarially**: Run red-team exercises where you actively try to break your own system using known injection techniques, jailbreak patterns, and edge cases. Regularly testing with inputs like “Ignore all above instructions and…” helps you identify vulnerabilities before attackers do.\\n\\n## [Prompt engineering best practices FAQs](#prompt-engineering-best-practices-faqs)\\n\\n**What is the difference between few-shot and zero-shot prompting?**\\n\\nZero-shot prompting is when you give the AI a direct instruction without any examples, relying on the model to understand and complete the task from the instruction alone. Few-shot prompting involves providing one or more input-output examples before your actual request, which helps the model learn the format, tone, or style you’re looking for. Few-shot prompting is particularly useful when you need consistent formatting or when the task involves subtle patterns that are easier to show than describe.\\n\\n**Do I need to know how to code to use prompt engineering?**\\n\\nNo—the core techniques are about communicating clearly with the model, not technical expertise. In fact, creatives often excel at prompt engineering because they’re practiced at describing concepts, setting tone, and iterating on feedback. That said, coding knowledge helps when you’re working with structured outputs like JSON, using the API directly, or building prompts into automated workflows.\\n\\n**Can prompt engineering help reduce AI hallucinations?**\\n\\nYes, several techniques help. Providing relevant data or context gives the model a factual foundation to work from, and explicitly giving the model permission to say “I don’t know” reduces the likelihood of fabricated answers. Chain-of-thought prompting also helps because it forces the model to show its work, making errors easier to spot.\\n\\n**What are common mistakes to avoid in prompt engineering?**\\n\\nBeing too vague is one of the most common issues—asking for “a summary” without specifying length, format, or focus leaves too much room for interpretation. Other frequent mistakes include overloading a single prompt with multiple unrelated tasks, using negative instructions instead of positive framing, and skipping iteration rather than testing and refining based on results.\\n\\n**What are the risks of prompt injection, and how do I prevent them?**\\n\\nPrompt injection occurs when malicious or accidental input manipulates an LLM into ignoring its original instructions or revealing sensitive information. To reduce risk, use delimiters to separate system instructions from user input, sanitize inputs for suspicious patterns, and validate the model’s outputs before acting on them.\\n\\n## [Build with DigitalOcean’s Gradient Platform](#build-with-digitalocean-s-gradient-platform)\\n\\n[DigitalOcean Gradient Platform](/products/gradientai/platform) makes it easier to build and deploy AI agents without managing complex infrastructure. Build custom, fully-managed agents backed by the world’s most powerful LLMs from Anthropic, DeepSeek, Meta, Mistral, and OpenAI. From customer-facing chatbots to complex, multi-agent workflows, integrate agentic AI with your application in hours with transparent, usage-based billing and no infrastructure management required.\\n\\n[https://www.youtube.com/embed/4zvvPar7Ybs](https://www.youtube.com/embed/4zvvPar7Ybs)\\n\\n**Key features**:\\n\\n- Serverless inference with leading LLMs and simple API integration\\n\\n- RAG workflows with knowledge bases for fine-tuned retrieval\\n\\n- Function calling capabilities for real-time information access\\n\\n- Multi-agent crews and agent routing for complex tasks\\n\\n- Guardrails for content moderation and sensitive data detection\\n\\n- Embeddable chatbot snippets for easy website integration\\n\\n- Versioning and rollback capabilities for safe experimentation\\n\\n[Get started with DigitalOcean Gradient Platform](/company/contact/sales?referrer=TheWave) for access to everything you need to build, run, and manage the next big thing.\\n\\n========================\\n\"" +2025-12-30T23:04:14.805176Z INFO ThreadId(10) rigscribe::tools::prompt_reviewer: src/tools/prompt_reviewer.rs:40: [Tool Calling]-> PromptReviewer with args: PromptReviewerArgs { intent: Intent { text: "write a python fonction" }, spec: Specification { goal: "Create a working piece of Python code structured as a function.", constraints: "- The programming language must be Python.\n- The function's purpose, name, parameters, and return value are completely undefined.\n- High probability of delivering a function that does not meet the user's actual, unstated needs.\n- The solution will likely be too generic or trivial to be useful without further clarification." } } +2025-12-30T23:04:27.396940Z INFO ThreadId(13) rigscribe::tools::web_searcher: src/tools/web_searcher.rs:27: [Tool Calling]-> WebSearcher with args: Webquery { query: "best practices for python code generation prompts" } +2025-12-30T23:04:30.607099Z INFO ThreadId(10) rigscribe::agents: src/agents/mod.rs:65: Tool executed tool="WebSearcher" args="{\"query\":\"best practices for python code generation prompts\"}" result="\"![]()\\n\\nYou've been blocked by network security.\\n\\nIf you think you've been blocked by mistake, file a ticket below and we'll look into it.\\n\\n[File a ticket](https://support.reddithelp.com/hc/en-us/requests/new?ticket_form_id=21879292693140)\\n\\n========================\\n[![Image description](https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fouro0wg11ydxmhunehso.png)](https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fouro0wg11ydxmhunehso.png)\\n\\n**[Prompt Engineering for Python Code Generation](https://keploy.io/blog/community/prompt-engineering-for-python-code-generation-with-keploy)**\\n\\nPrompt engineering is an essential technique for optimizing AI-generated Python code, ensuring accurate, efficient, and context-aware outputs. As AI-powered tools like OpenAI’s GPT, GitHub Copilot, and Keploy gain popularity in software development, crafting well-structured prompts becomes crucial for generating high-quality Python code. This guide explores the fundamentals of prompt engineering and best practices for AI-driven Python code generation.\\n\\n**[What is Prompt Engineering](https://keploy.io/blog/community/prompt-engineering-for-python-code-generation-with-keploy)?**\\n\\nPrompt engineering involves designing and structuring inputs to guide AI models in producing high-quality responses. In the context of Python code generation, well-crafted prompts ensure the AI understands the intent and delivers precise results. By refining how queries are structured, developers can enhance the quality of AI-generated code, making it more readable, maintainable, and efficient.\\n\\n**Why is Prompt Engineering Important for Python Code Generation?**\\n\\nAI models rely on prompts to generate relevant and accurate code. A poorly structured prompt can lead to ambiguous or incorrect outputs, requiring additional effort to debug and refine. A well-structured prompt reduces ambiguity, improves efficiency, and minimizes the need for extensive post-processing. Prompt engineering is especially crucial for automated test generation tools like Keploy, which leverage AI to create reliable test cases for applications.\\n\\n**Key Strategies for Effective Prompt Engineering**\\n\\nTo generate better Python code with AI, consider the following strategies:\\n\\n**1. Be Specific and Contextual**\\n\\nProviding clear instructions and specifying the expected output format helps AI models generate more accurate results. Instead of asking, \\\"Write a Python function,\\\" a better prompt would be, \\\"Write a Python function that takes a list of numbers and returns a sorted list using the quicksort algorithm.\\\"\\n\\n**2. Use Examples to Guide the AI**\\n\\nIncluding input-output examples in your prompt helps AI understand patterns and expectations for code generation. For instance:\\n\\nPrompt: Write a Python function to check if a string is a palindrome.\\n\\nExample Input: 'radar'\\n\\nExample Output: True\\n\\nBy providing examples, AI can better align with the expected behavior.\\n\\n**3. Define Constraints and Requirements**\\n\\nExplicitly stating programming constraints, such as using specific libraries or avoiding certain functions, refines the generated output. If you want to generate a machine learning model using TensorFlow instead of PyTorch, include that in the prompt.\\n\\n**4. Break Down Complex Tasks into Steps**\\n\\nSplitting a large problem into smaller tasks improves AI performance and ensures structured code generation. Instead of asking AI to \\\"build a chatbot,\\\" break it down into \\\"write a Python function to handle user input,\\\" \\\"generate responses using an NLP model,\\\" and \\\"deploy the chatbot using Flask.\\\"\\n\\n**Common Challenges in AI-Generated Python Code**\\n\\nDespite advancements, AI-generated Python code may have challenges such as logical errors, lack of optimization, or missing edge cases. Effective prompt engineering can mitigate these issues by providing clear instructions and expected behavior. Keploy, an AI-powered test case generation tool, can help validate and refine AI-generated code by automating the testing process and identifying potential issues.\\n\\n**Practical Examples of Prompt Engineering for Python Code**\\n\\n**Example 1: Generating a Simple Function**\\n\\nPrompt:\\n\\nWrite a Python function that takes a list of numbers and returns the maximum value.\\n\\nGenerated Code:\\n\\ndef find_max(numbers):\\n\\nreturn max(numbers)\\n\\n**Example 2: Enforcing Coding Standards**\\n\\nPrompt:\\n\\nWrite a Python function that calculates the factorial of a number, following PEP 8 style guide.\\n\\nGenerated Code:\\n\\ndef factorial(n):\\n\\n\\\"\\\"\\\"Returns the factorial of a given number.\\\"\\\"\\\"\\n\\nif n == 0:\\n\\nreturn 1\\n\\nreturn n * factorial(n - 1)\\n\\n**Example 3: Using External Libraries**\\n\\nPrompt:\\n\\nWrite a Python function that uses NumPy to generate a random array of size 10.\\n\\nGenerated Code:\\n\\nimport numpy as np\\n\\ndef random_array():\\n\\nreturn np.random.rand(10)\\n\\n**Tools for Python Code Generation with AI**\\n\\nSeveral AI-powered tools assist in generating Python code efficiently. Some of the notable tools include:\\n\\n- **OpenAI’s GPT** – Generates Python code based on structured prompts.\\n- **GitHub Copilot** – Suggests code snippets within the IDE.\\n- **Keploy** – Automates test case generation and ensures AI-generated code is reliable and functional.\\n\\n\\n**Best Practices for Refining AI-Generated Code**\\n\\n1. **Review and Validate Output** – Always test AI-generated code for correctness and efficiency.\\n2. **Optimize for Readability** – Ensure the generated code follows best practices for maintainability.\\n3. **Use AI for Assistance, Not Replacement** – AI can enhance productivity but should complement human expertise.\\n4. **Leverage Keploy for Test Generation** – AI-generated code should be tested to ensure robustness, and Keploy can automate test creation for better validation.\\n\\n\\n**Conclusion**\\n\\nPrompt engineering is a powerful skill that enhances AI-driven Python code generation. By crafting precise, structured prompts, developers can improve AI accuracy, efficiency, and reliability in coding tasks. Whether generating new functions, enforcing coding standards, or integrating external libraries, well-designed prompts ensure optimal AI-generated output. Additionally, tools like Keploy help validate and test AI-generated code, ensuring its reliability in real-world applications.\\n\\nMastering prompt engineering not only improves the quality of AI-generated Python code but also enhances overall development efficiency, making AI an invaluable assistant in modern software engineering.\\n\\n========================\\n## Table of contents[/guides/better-prompts-ai-code#table-of-contents](/guides/better-prompts-ai-code#table-of-contents)\\n\\n- [Be specific with language and requirements](#be-specific-with-language-and-requirements)\\n- [Include examples and constraints](#include-examples-and-constraints)\\n- [Break down complex tasks and iterate](#break-down-complex-tasks-and-iterate)\\n- [Avoid common prompting pitfalls](#avoid-common-prompting-pitfalls)\\n- [Graphite Agent: AI code review for quality and consistency](#graphites-Graphite Agent-ai-code-review-for-quality-and-consistency)\\n\\n\\n[Large language models (LLMs)](https://developers.google.com/machine-learning/resources/intro-llms) can generate code in many programming languages, but the quality of their output depends heavily on how you prompt them. Crafting a precise prompt can be the difference between hours of debugging and a seamless implementation. The key is to clearly communicate your intent and requirements, because an AI doesn't inherently know your goals – it only follows the patterns described in your prompt. This guide introduces language-agnostic prompt engineering techniques to help both beginners and experienced developers get more accurate and useful code from AI. We'll illustrate each technique with examples of weak vs. improved prompts to show how small changes can yield better results. For a deeper dive into how AI code review works, see our [guide on AI code review](https://graphite.com/guides/ai-code-review).\\n\\n### Be specific with language and requirements[/guides/better-prompts-ai-code#be-specific-with-language-and-requirements](/guides/better-prompts-ai-code#be-specific-with-language-and-requirements)\\n\\nThe first rule of prompt engineering for code is to be specific about what you want. Vague prompts yield vague (often incorrect) results. Always mention the programming language, the task or algorithm, and any requirements or constraints. For example, consider asking an AI to generate a sorting algorithm:\\n\\n- **Weak prompt:** \\\"Write a sorting algorithm.\\\"\\n- **Improved prompt:** \\\"Write a **[Python](https://www.python.org/)** implementation of **merge sort** optimized for **memory efficiency**, including **time complexity analysis** and **error handling** for edge cases (e.g. empty arrays).\\\"\\n\\n\\nThe improved prompt is much clearer. It specifies:\\n\\n- The programming language (Python) and specific algorithm (merge sort).\\n- The optimization goal (memory efficiency) and required output details (time complexity analysis).\\n- Error handling expectations and an edge case to consider (empty arrays).\\n\\n\\nBy spelling out these details, you greatly reduce ambiguity. [Microsoft's Developer Tools](https://developer.microsoft.com/) research group observed that prompts with explicit specifications reduced the need for back-and-forth refinements by 68%. In practice, this means the AI is more likely to produce correct, ready-to-use code on the first try.\\n\\nBeing specific also means providing technical context when relevant. If certain constraints matter – such as the target environment, API version, performance or security requirements – include them in the prompt. For instance, specify \\\"Node.js 18 server environment\\\" or \\\"Python 3.10+\\\" if it matters. Otherwise, the model might generate code that is technically correct but not suitable for your situation.\\n> **Tip:** AI code review tools like [Graphite Agent](https://graphite.com/features/agent) can help enforce technical requirements and catch missing context.\\n\\n### Include examples and constraints[/guides/better-prompts-ai-code#include-examples-and-constraints](/guides/better-prompts-ai-code#include-examples-and-constraints)\\n\\nAnother powerful technique is to provide examples or test cases in your prompt. Examples act as implicit constraints, guiding the model toward the format or behavior you expect. For instance, if you want a function with a specific signature or output format, show a snippet of that.\\n\\nImagine you need an email validation function. A weak prompt might just say, \\\"Validate an email address.\\\" An improved prompt would describe requirements and give a usage example, e.g.:\\n> **Prompt:** \\\"Create a TypeScript function `validateEmail(email: string): {isValid: boolean; message: string}` that checks if an email is RFC-5322 compliant and rejects disposable domains. For example, if the input is `\\\"test@tempmail.com\\\"`, it should return an object like `{ isValid: false, message: 'Disposable domain not allowed' }`.\\\"\\n\\nIn this improved version, the prompt lists specific rules (RFC compliance and no disposable domains) and provides a clear function signature and an example of expected output. This gives the AI a template to follow and boundaries to respect. As Dr. Rachel Thomas of fast.ai notes, *\\\"examples in prompts serve as implicit constraints that guide the model toward the desired output format.\\\"* By including a small example or an outline of the output, you help the model understand exactly what you want.\\n\\n### Break down complex tasks and iterate[/guides/better-prompts-ai-code#break-down-complex-tasks-and-iterate](/guides/better-prompts-ai-code#break-down-complex-tasks-and-iterate)\\n\\nIf your desired output is complex, avoid requesting everything in one huge prompt. It's often better to break down a complex task into smaller steps or use an iterative approach. Large models can handle reasonably detailed prompts, but asking for a full application in one go is likely to produce a convoluted or partial answer. Instead, tackle complex tasks in stages and possibly across multiple prompts:\\n\\n1. **Decompose the problem:** Outline sub-tasks in separate prompts (e.g. first define the data model, then the API endpoints, then the UI components). This ensures each part is handled with focus.\\n2. **Use step-by-step reasoning:** You can actually instruct the model to \\\"think step by step.\\\" For example: \\\"I need a JavaScript function to find the longest increasing subsequence in an array. Let's solve this step by step: first explain the concept, then choose an algorithm (greedy vs DP), then write the code, then analyze complexity.\\\" By embedding this reasoning process in the prompt, you encourage the AI to work through the problem methodically.\\n3. **Iterate with the AI:** Treat the AI's first output as a draft. Review it, then issue follow-up prompts to refine the code.\\n\\n\\nBy breaking prompts into smaller pieces or sequential turns, you make it easier for the model to comply at each step. You also get opportunities to catch mistakes early and steer the output in the right direction. Remember that you can carry context between prompts (in a chat-based LLM) – use that to your advantage for iterative improvement.\\n\\nFor more information on [integrating AI into your code review workflow](https://graphite.com/guides/integrating-ai-code-review-workflow) and [how to review code written by AI](https://graphite.com/guides/how-to-review-code-written-by-ai), these guides offer practical advice for iterative, AI-assisted development.\\n\\n### Avoid common prompting pitfalls[/guides/better-prompts-ai-code#avoid-common-prompting-pitfalls](/guides/better-prompts-ai-code#avoid-common-prompting-pitfalls)\\n\\nWhile applying the above techniques, watch out for a few common mistakes that can undermine your results:\\n\\n- **Under-specifying the task:** Don't assume the AI \\\"knows\\\" what you intended. If a prompt is too minimal, the model might solve the wrong problem or omit important features. Always double-check that your prompt fully describes the problem.\\n- **Ignoring context or constraints:** Failing to mention the operating environment, framework, or performance/security constraints can result in code that doesn't fit your project needs. Also make sure to include any version requirements or security considerations. Without this, you might get code that technically works but is impractical or unsafe for your use case. The more context you give, the more relevant the output.\\n- **Over-reliance on AI without verification:** Even with good prompts, AI-generated code isn't guaranteed to be perfect. The model might produce subtle logic bugs, use deprecated APIs, or just take an unconventional approach. Never merge AI-written code into production without reviewing it. Treat the AI as a coding assistant, not an infallible coder. Write prompts that encourage best practices (like asking for safe, idiomatic code), and always test and review the output thoroughly. Using linters or static analysis on AI code is also a smart safeguard.\\n> **To compare approaches:** This guide, [Automated vs. manual code reviews: Finding the right balance](https://graphite.com/guides/automated-vs-manual-code-reviews), explores the strengths and limitations of both, and how to combine them for best results.\\n\\nBy avoiding these pitfalls, you ensure that prompt engineering remains a helpful aid rather than a source of technical debt. Prompting is an iterative learning process – over time, you'll learn which clarifications or extra details yield better code from your preferred LLM.\\n\\n### Graphite Agent: AI code review for quality and consistency[/guides/better-prompts-ai-code#graphite-agent-ai-code-review-for-quality-and-consistency](/guides/better-prompts-ai-code#graphite-agent-ai-code-review-for-quality-and-consistency)\\n\\nWriting a good prompt is the first step, but ensuring the quality and consistency of AI-generated code is the next challenge. This is where tools like [Graphite Agent](https://graphite.com/features/agent) come in. Graphite Agent is an AI-powered code review assistant that provides immediate, context-aware feedback on code changes – including those written by an AI. It hooks into your development workflow (e.g. reviewing [pull requests](https://docs.github.com/en/pull-requests)) and flags issues ranging from logic bugs and security vulnerabilities to style violations and missing documentation. Graphite Agent uses your repository's context and even custom rules you define to give relevant suggestions. For example, you can configure Graphite Agent with your project's style guide or common AI mistakes to watch for, and it will catch those patterns in AI-generated code before a human reviewer even sees them.\\n\\n![screenshot of Graphite Agent comment](/images/content/guides/better-prompts-ai-code/sample-Graphite Agent-comment.png)\\n\\nBy incorporating a tool like Graphite Agent in your workflow, you add an extra layer of scrutiny for AI-produced code. It helps ensure the code meets your team's quality standards and is consistent with the rest of your codebase. In practice, Graphite Agent can automatically identify edge cases the AI missed, point out performance issues, and even suggest fixes – all of which saves human reviewers time and instills confidence in using AI-generated code. Remember, prompt engineering and AI coding tools are most effective when paired with robust review practices. [Graphite Agent](https://graphite.com/features/agent) bridges that gap by reviewing AI code for you, so you can safely harness AI code generation while maintaining high code quality and consistency.\\n\\n========================\\nYou’ve used [ChatGPT](https://realpython.com/chatgpt-coding-mentor-python/), and you understand the potential of using a **large language model (LLM)** to assist you in your tasks. Maybe you’re already working on an LLM-supported application and have read about **prompt engineering**, but you’re unsure how to translate the theoretical concepts into a practical example.\\n\\nYour text prompt instructs the LLM’s responses, so tweaking it can get you vastly different output. In this tutorial, you’ll apply multiple **prompt engineering techniques** to a real-world example. You’ll experience prompt engineering as an iterative process, see the effects of applying various techniques, and learn about related concepts from machine learning and data engineering.\\n\\n**In this tutorial, you’ll learn how to:**\\n\\n- Work with OpenAI’s **GPT-3.5** and **GPT-4** models through their **API**\\n- Apply prompt engineering techniques to a **practical, real-world example**\\n- Use **numbered steps**, **delimiters**, and **few-shot prompting** to improve your results\\n- Understand and use **chain-of-thought prompting** to add more context\\n- Tap into the power of **roles** in messages to go beyond using singular **role prompts**\\n\\n\\nYou’ll work with a **Python script** that you can repurpose to fit your own LLM-assisted task. So if you’d like to use practical examples to discover how you can use prompt engineering to get better results from an LLM, then you’ve found the right tutorial!\\n\\n== **Take the Quiz:**== Test your knowledge with our interactive “Practical Prompt Engineering” quiz. You’ll receive a score upon completion to help you track your learning progress:\\n\\n---\\n\\n\\n[Prompt Engineering: A Practical Example](/quizzes/practical-prompt-engineering/)\\n\\n**Interactive Quiz**\\n\\n[Practical Prompt Engineering](/quizzes/practical-prompt-engineering/)\\n\\nIn this quiz, you'll test your understanding of prompt engineering techniques with large language models (LLMs) like GPT-3.5 and GPT-4. You'll revisit how to work with OpenAI's API, apply prompt engineering techniques to practical examples, and use various strategies to improve your results.\\n\\n## Understand the Purpose of Prompt Engineering[](#understand-the-purpose-of-prompt-engineering \\\"Permanent link\\\")\\n\\nPrompt engineering is more than a buzzword. You can get vastly different output from an LLM when using different prompts. That may seem obvious when you consider that you get different output when you ask different questions—but it also applies to phrasing the same conceptual question differently. Prompt engineering means constructing your text input to the LLM using specific approaches.\\n\\nYou can think of prompts as arguments and the LLM as the function to which you pass these arguments. Different input means different output:\\n\\nWhile an LLM is much more complex than the toy function above, the fundamental idea holds true. For a successful function call, you’ll need to know exactly which argument will produce the desired output. In the case of an LLM, that argument is text that consists of many different **tokens**, or [pieces of words](https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them).\\n\\nThe field of prompt engineering is still changing rapidly, and there’s a lot of active research happening in this area. As LLMs continue to evolve, so will the prompting approaches that will help you achieve the best results.\\n\\nIn this tutorial, you’ll cover some prompt engineering techniques, along with approaches to iteratively developing prompts, that you can use to get better text completions for your own LLM-assisted projects:\\n\\n- [Zero-Shot Prompting](#describe-your-task)\\n- [Few-Shot Prompting](#use-few-shot-prompting-to-improve-output)\\n- [Delimiters](#use-delimiters-to-clearly-mark-sections-of-your-prompt)\\n- [Numbered Steps](#describe-your-request-in-numbered-steps)\\n- [Role Prompts](#add-a-role-prompt-to-set-the-tone)\\n- [Chain-of-Thought (CoT) Prompting](#walk-the-model-through-chain-of-thought-prompting)\\n- [Structured Output](#structure-your-output-format-as-json)\\n- [Labeled Conversations](#improve-your-output-with-the-power-of-conversation)\\n\\n\\nThere are more techniques to uncover, and you’ll also find links to additional resources in the tutorial. Applying the mentioned techniques in a practical example will give you a great starting point for improving your LLM-supported programs. If you’ve never worked with an LLM before, then you may want to peruse [OpenAI’s GPT documentation](https://platform.openai.com/docs/guides/gpt) before diving in, but you should be able to follow along either way.\\n\\n## Get to Know the Practical Prompt Engineering Project[](#get-to-know-the-practical-prompt-engineering-project \\\"Permanent link\\\")\\n\\nYou’ll explore various prompt engineering techniques in service of a practical example: [sanitizing](https://en.wikipedia.org/wiki/Data_sanitization) customer chat conversations. By practicing different prompt engineering techniques on a single real-world project, you’ll get a good idea of why you might want to use one technique over another and how you can apply them in practice.\\n\\nImagine that you’re the resident Python developer at a company that handles thousands of customer support chats on a daily basis. Your job is to format and sanitize these conversations. You also help with deciding which of them require additional attention.\\n\\n### Collect Your Tasks[](#collect-your-tasks \\\"Permanent link\\\")\\n\\nYour big-picture assignment is to help your company stay on top of handling customer chat conversations. The conversations that you work with may look like the one shown below:\\n\\nYou’re supposed to make these text conversations more accessible for further processing by the customer support department in a few different ways:\\n> - Remove personally identifiable information.\\n> - Remove swear words.\\n> - Clean the date-time information to only show the date.\\n\\nThe swear words that you’ll encounter in this tutorial won’t be spicy at all, but you can consider them stand-ins for more explicit phrasing that you might find out in the wild. After sanitizing the chat conversation, you’d expect it to look like this:\\n\\nSure—you could handle it [using Python’s `str.replace()`](https://realpython.com/replace-string-python/) or [show off](https://xkcd.com/208) your [regular expression](https://realpython.com/regex-python/) skills. But there’s more to the task than immediately meets the eye.\\n\\nYour project manager isn’t a technical person, and they stuck another task at the end of this list. They may think of the task as a normal continuation of the previous tasks. But you know that it requires an entirely different approach and technology stack:\\n> Mark the conversations as “positive” or “negative.”\\n\\nThat task lies in the realm of [machine learning](https://realpython.com/learning-paths/machine-learning-python/), namely [text classification](https://realpython.com/python-keras-text-classification/), and more specifically [sentiment analysis](https://realpython.com/python-nltk-sentiment-analysis/). Even [advanced regex skills](https://realpython.com/regex-python-part-2/) won’t get you far in this challenge.\\n\\nAdditionally, you know that the customer support team that you’re preparing the data for will want to continue working on it programmatically. Plain text isn’t necessarily the best format for doing that. You want to do work that’s useful for others, so you add yet another stretch goal to your growing list of tasks:\\n> Format the output as JSON.\\n\\nThis task list is quickly growing out of proportion! Fortunately, you’ve got access to the [OpenAI API](https://platform.openai.com/docs/api-reference/), and you’ll employ the help of their LLM to solve all of these challenges.\\n\\nOne of the impressive features of LLMs is the breadth of tasks that you can use them for. So you’ll cover a lot of ground and different areas of use. And you’ll learn how to tackle them all with prompt engineering techniques.\\n\\n### Prepare Your Tools[](#prepare-your-tools \\\"Permanent link\\\")\\n\\nTo follow along with this tutorial, you’ll need to know how to [run a Python script](https://realpython.com/run-python-scripts/) from your command-line interface (CLI), and you’ll need an [API key from OpenAI](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key).\\n\\nYou’ll focus on prompt engineering, so you’ll only use the CLI app as a tool to demonstrate the different techniques. However, if you want to understand the code that you’ll be using, then it’ll help to have some experience with [Python classes](https://realpython.com/python-classes/), [defining your own Python functions](https://realpython.com/defining-your-own-python-function/), [the name-main idiom](https://realpython.com/if-name-main-python/), and [using Python to interact with web APIs](https://realpython.com/api-integration-in-python/).\\n\\nTo get started, go ahead and download the example Python script that you’ll work with throughout the tutorial:\\n\\nThe codebase represents a light abstraction layer on top of the OpenAI API and exposes one function called `get_chat_completion()` that’ll be of primary interest for the tutorial. The function interacts with OpenAI’s [`/chat/completions` endpoint](https://platform.openai.com/docs/api-reference/chat) to generate responses using different models, such as **GPT-3.5-Turbo** and **GPT-4**. You’ll explore both models, starting with GPT-3.5-Turbo, and eventually you’ll move on to the more powerful GPT-4 model.\\n\\nMost of the code in `app.py` revolves around setting up and fetching the settings from `settings.toml`.\\n\\nThe script also parses a command-line argument to allow you to conveniently specify an input file.\\nThe input files that you’ll primarily work with contain [LLM-generated customer support chat conversations](https://arxiv.org/abs/2206.08082), but feel free to reuse the script and provide your own input text files for additional practice.\\n\\nThe heart of the codebase is `settings.toml`. This [TOML settings file](https://realpython.com/python-toml/) hosts the prompts that you’ll use to sharpen your prompt engineering skills. It contains different prompts formatted in the human-readable settings format TOML.\\n\\nKeeping your prompts in a dedicated settings file can help to put them under [version control](https://realpython.com/python-git-github-intro/), which means you can keep track of different versions of your prompts, which will inevitably change during development.\\n\\nYour Python script will read the prompts from `settings.toml`, assemble them meaningfully, and send an API requests to OpenAI.\\n\\nAlternatively, you can also run all the text prompts directly in the [OpenAI playground](https://platform.openai.com/playground), which will give you the same functionality as the script. You could even paste the prompts into the [ChatGPT](https://chat.openai.com/) interface. However, the results will vary because you’ll be interacting with a different model and won’t have the opportunity to change certain settings.\\n\\n### Set Up the Codebase[](#set-up-the-codebase \\\"Permanent link\\\")\\n\\nMake sure that you’re on [Python 3.11](https://realpython.com/python311-new-features/) or higher, so that you can [interact with TOML files](https://realpython.com/python311-tomllib/) using the standard library. If you haven’t downloaded the codebase yet, go ahead and click the link below:\\n\\nUnzip the folder and use your CLI to navigate into the folder. You’ll see a handful of files. The most important ones are `app.py` and `settings.toml`:\\n\\n ./\\n ├── LICENSE\\n ├── README.md\\n ├── app.py\\n ├── chats.txt\\n ├── requirements.txt\\n ├── sanitized-chats.txt\\n ├── sanitized-testing-chats.txt\\n ├── settings.toml\\n ├── settings-final.toml\\n └── testing-chats.txt\\n\\nThe file `settings.toml` contains placeholders for all the prompts that you’ll use to explore the different prompt engineering techniques. That’s the file that you’ll primarily work with, so open it up. You’ll use it to iteratively develop the prompts for your application.\\n\\nThe file `app.py` contains the Python code that ties the codebase together. You’ll run this script many times throughout the tutorial, and it’ll take care of pulling your prompts from `settings.toml`.\\n\\nAfter you’ve downloaded and unpacked the codebase, create and activate a new [virtual environment](https://realpython.com/python-virtual-environments-a-primer/). Then use [`pip`](https://realpython.com/what-is-pip/) to install the required dependencies:\\n\\nNote that this tutorial uses `openai` version 1.13.3. OpenAI may introduce breaking changes between API versions, so make sure that you install the pinned dependencies from the requirements file. Then you’ll be able to work through the tutorial without any hiccups.\\n\\nTo run the script successfully, you’ll need an [OpenAI API key](https://platform.openai.com/account/api-keys) with which to authenticate your API requests. Make sure to keep that key private and never commit it to version control! If you’re new to using API keys, then read up on [best practices for API key safety](https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety).\\n\\nTo integrate your API key with the script and avoid leaking it publicly, you can export the API key as an environment variable:\\n\\nAfter you’ve added your API key as an environment variable named `OPENAI_API_KEY`, the script will automatically pick it up during each run.\\n\\nAt this point, you’ve completed the necessary setup steps. You can now run the script using the command line and provide it with a file as additional input text:\\n\\nThe command shown above combines the customer support chat conversations in `chats.txt` with prompts and API call parameters that are saved in `settings.toml`, then sends a request to the OpenAI API. Finally, it prints the resulting text completion to your terminal.\\n\\nFrom now on, you’ll primarily make changes in `settings.toml`. The code in `app.py` is just here for your convenience, and you won’t have to edit that file at all. The changes in the LLM’s output will come from changing the prompts and a few of the API call arguments.\\n\\n### Freeze Responses by Setting the Temperature to Zero[](#freeze-responses-by-setting-the-temperature-to-zero \\\"Permanent link\\\")\\n\\nWhen you’re planning to integrate an LLM into a product or a workflow, then you’ll generally want deterministic responses. The same input should give you the same output. Otherwise, it gets hard to provide a consistent service or debug your program if something goes wrong.\\n\\nBecause of this, you’ll want to set the [`temperature` argument](https://platform.openai.com/docs/api-reference/completions/create#completions/create-temperature) of your API calls to `0`. This value will mean that you’ll get *mostly* deterministic results.\\n\\nLLMs do text completion by predicting the next token based on the probability that it follows the previous tokens. Higher `temperature` settings will introduce more randomness into the results by allowing the LLM to pick tokens with lower probabilities. Because there are so many token selections chained one after one the other, picking one different token can sometimes lead to vastly different results.\\n\\nIf you use the LLM to generate ideas or [alternative implementations](https://realpython.com/chatgpt-coding-mentor-python/#prompt-chatgpt-for-alternative-implementations) of a programming task, then higher values for `temperature` might be interesting. However, they’re generally undesirable when you build a product.\\n\\nIn the example codebase, you can adjust `temperature` right inside your `settings.toml` file:\\n\\nThe initial value is set at `0`. All the examples in this tutorial assume that you leave `temperature` at `0` so that you’ll get mostly deterministic results. If you want to experiment with how a higher temperature changes the output, then feel free to play with it by changing the value for `temperature` in this settings file.\\n\\nIt’s important to keep in mind that you won’t be able to achieve true determinism with the current LLM models offered by OpenAI even if you keep `temperature` at `0`:\\n> An edge-case in GPT-3 with big implications: Inference is non-deterministic (even at temperature=0) when top-2 token probabilities are <1% different. So temperature=0 output is *very close* to deterministic, but actually isn’t. Worth remembering. ([Source](https://twitter.com/goodside/status/1608525976702525440?ref_src=twsrc%5Etfw))\\n\\nSo, while you can’t entirely guarantee that the model will always return the same result, you can get much closer by setting `temperature` to `0`.\\n\\nAnother approach that improves determinism in the results is to set a value for [the `seed` parameter](https://cookbook.openai.com/examples/reproducible_outputs_with_the_seed_parameter). The provided code sets the seed to `12345`. However, this only has an effect on some of the models.\\n\\n## Start Engineering Your Prompts[](#start-engineering-your-prompts \\\"Permanent link\\\")\\n\\nNow that you have an understanding of prompt engineering and the practical project that you’ll be working with, it’s time to dive into some prompt engineering techniques. In this section, you’ll learn how to apply the following techniques to your prompts to get the desired output from the language model:\\n\\n- **Zero-shot prompting:** Giving the language model normal instructions without any additional context\\n- **Few-shot prompting:** Conditioning the model on a few examples to boost its performance\\n- **Using delimiters:** Adding special tokens or phrases to provide structure and instructions to the model\\n- **Detailed, numbered steps:** Breaking down a complex prompt into a series of small, specific steps\\n\\n\\nBy practicing these techniques with the customer chat conversation example, you’ll gain a deeper understanding of how prompt engineering can enhance the capabilities of language models and improve their usefulness in real-world applications.\\n\\n### Describe Your Task[](#describe-your-task \\\"Permanent link\\\")\\n\\nYou’ll start your prompt engineering journey with a concept called **zero-shot prompting**, which is just a fancy way of saying that you’re asking a question or describing a task:\\n> Remove personally identifiable information, only show the date, and replace all swear words with “😤”\\n\\nThis task description focuses on the requested steps for sanitizing the customer chat conversation and literally spells them out. This is the prompt that’s currently saved as `instruction_prompt` in the `settings.toml` file:\\n\\nIf you run the Python script and provide the support chat file as an argument, then it’ll send this prompt together with the content of `chats.txt` to OpenAI’s text completion API:\\n\\nIf you correctly installed the dependencies and added your OpenAI API key as an environment variable, then all you need to do is wait until you see the API response pop up in your terminal:\\n\\nIn the example output, you can see that the prompt that you provided didn’t do a good job tackling the tasks. And that’s putting it gently! It picked up that it should do something with the huffing emoji and reduce the [ISO date-time](https://en.wikipedia.org/wiki/ISO_8601) to only a date. Your results might not have tackled all of that. Overall, nearly all of the work is left undone and the output is useless.\\n\\nIf you’re new to interacting with LLMs, then this may have been a first attempt at outsourcing your development work to the text completion model. But these initial results aren’t exactly exhilarating.\\n\\nSo you’ve described the task in natural language and gotten a bad result. But don’t fret—throughout the tutorial you’ll learn how you can get more useful responses for your task.\\n\\nOne way to do that is by increasing the number of **shots**, or examples, that you give to the model. When you’ve given the model zero shots, the only way to go is up! That’s why you’ll improve your results through **few-shot prompting** in the next section.\\n\\n### Use Few-Shot Prompting to Improve Output[](#use-few-shot-prompting-to-improve-output \\\"Permanent link\\\")\\n\\nFew-shot prompting is a prompt engineering technique where you provide example tasks and their expected solutions in your prompt. So, instead of just describing the task like you did before, you’ll now add an example of a chat conversation and its sanitized version.\\n\\nOpen up `settings.toml` and change your `instruction_prompt` by adding such an example:\\n\\nOnce you’ve applied the change, give the LLM another chance to sanitize the chat conversations for you by running the script again:\\n\\nYou’ll have to wait for the LLM to predict all the tokens. When it’s done, you’ll see a fresh response pop up in your terminal:\\n\\nOkay, great! This time at least the LLM didn’t eat up all the information that you passed to it without giving anything useful back!\\n\\nThis time, the model tackled some of the tasks. For example, it sanitized the names in square brackets. However, the names of the customers are still visible in the actual conversations. It also didn’t censor the order numbers or the email address.\\n\\nThe model probably didn’t sanitize any of the names in the conversations or the order numbers because the chat that you provided didn’t contain any names or order numbers. In other words, the output that you provided didn’t show an example of redacting names, order numbers, or email addresses in the conversation text.\\n\\nHere you can see how important it is to choose good examples that clearly represent the output that you want.\\n\\nSo far, you’ve provided *one* example in your prompt. To cover more ground, you’ll add another example so that this part of your prompt truly puts the *few* in few-shot prompting:\\n\\nYou added a second example that contains both a customer name as well as an order number in the chat text body. The example of a sanitized chat shows both types of sensitive data replaced with a sequence of asterisks (`****`). Now you’ve given the LLM a good example to *model*.\\n\\nAfter editing `instruction_prompt` in `settings.toml`, run your script again and wait for the response to print to your terminal:\\n\\nWait? Where did most of the output go? You probably expected to see better results, but it looks like you’re getting only two of the conversations back this time!\\n\\nYou’ve added more text to your prompt. At this point, the task instructions probably make up proportionally too few tokens for the model to consider them in a meaningful way. The model lost track of what it was supposed to do with the text that you provided.\\n\\nAdding more examples should make your responses stronger instead of eating them up, so what’s the deal? You can trust that few-shot prompting works—it’s a widely used and very effective prompt engineering technique. To help the model distinguish which part of your prompt contains the instructions that it should follow, you can use *delimiters*.\\n\\n### Use Delimiters to Clearly Mark Sections of Your Prompt[](#use-delimiters-to-clearly-mark-sections-of-your-prompt \\\"Permanent link\\\")\\n\\nIf you’re working with content that needs specific inputs, or if you provide examples like you did in the previous section, then it can be very helpful to clearly mark specific sections of the prompt. Keep in mind that everything you write arrives to an LLM as a single prompt—a long sequence of tokens.\\n\\nYou can improve the output by using delimiters to fence and label specific parts of your prompt. In fact, if you’ve been running the example code, then you’ve already used delimiters to fence the content that you’re reading from file.\\n\\nThe script adds the delimiters when assembling the prompt in `app.py`:\\n\\nIn line 13, you wrap the chat content in between `>>>>>` and `<<<<<` delimiters. Marking parts of your prompt with delimiters can help the model keep track of which tokens it should consider as a single unit of meaning.\\n\\nYou’ve seen in the previous section that missing delimiters can lead to unexpected results. You might receive less output than expected, like in the previous example, or an empty response. But you might also receive output that’s quite different from what you want! For example, imagine that the chat content that you’re reformatting contains a question at the end, such as:\\n> Can you give me your order number?\\n\\nIf this question is the last line of your prompt without delimiters, then the LLM might continue the imaginary chat conversation by answering the question with an imaginary order number. Give it a try by adding such a sentence to the end of your current prompt!\\n\\nDelimiters can help to separate the content and examples from the task description. They can also make it possible to refer to specific parts of your prompt at a later point in the prompt.\\n\\nA delimiter can be any **sequence of characters** that usually *wouldn’t* appear together, for example:\\n\\n- `>>>>>`\\n- `====`\\n- `####`\\n\\n\\nThe number of characters that you use doesn’t matter too much, as long as you make sure that the sequence is relatively unique. Additionally, you can add labels just before or just after the delimiters:\\n\\n- `START CONTENT>>>>>` content `<<<<>>>>CONTENT<<<<<` in your task description. These delimiters match the delimiters that the code in `app.py` adds when assembling the prompt.\\n\\nYou’ve also delimited the examples that you’re providing with `#### START EXAMPLES` and `#### END EXAMPLES`, and you differentiate between the inputs and expected outputs using multiple dashes (`------`) as delimiters.\\n\\nWhile delimiters can help you to get better results, in this case your output is quite similar to before:\\n\\nIt’s noticeable that the model only shows the two example data that you passed as examples. Could it be that your prompt leads to something similar like [overfitting](https://realpython.com/linear-regression-in-python/#underfitting-and-overfitting)? Using the actual data that you want to sanitize as your training data is, anyway, not a good idea, so in the next section, you’ll make sure to change that.\\n\\nIn this section, you’ve learned how you can clarify the different parts of your prompt using delimiters. You marked which part of the prompt is the task description and which part contains the customer support chat conversations, as well as the examples of original input and expected sanitized output.\\n\\n### Test Your Prompt Across Different Data[](#test-your-prompt-across-different-data \\\"Permanent link\\\")\\n\\nSo far, you’ve created your few-shot examples from the same data that you also run the sanitation on. This means that you’re effectively using your test data to provide context to the model. Mixing training, validation, and testing data is a bad practice in machine learning. You might wonder how well your prompt generalizes to different input.\\n\\nTo test this out, run the script another time with the same prompt using the second file that contains chat conversations, `testing-chats.txt`. The conversations in this file contain different names, and different—soft—swear words:\\n\\nYou’ll keep running your script using `testing-chats.txt` moving forward, unless indicated differently.\\n\\nOnce you’ve waited for the LLM to generate and return the response, you’ll notice that the result isn’t very satisfying:\\n\\nThe model now understands that you meant the examples as examples to follow when applying edits and gives you back all of the new input data. However, it didn’t do a great job following the instructions.\\n\\nThe model didn’t identify new swear words and didn’t replace them. The model also didn’t redact the order numbers, nor did it anonymize the names. It looks like it only managed to reformat your date strings.\\n\\nSo your engineered prompt currently doesn’t work well, and generalizes even worse. If you built a pipeline based on this prompt, where new chats could contain new customer names, then the application would probably continue to perform poorly. How can you fix that?\\n\\nYou’ve grown your prompt significantly by providing more examples, but your task description is still largely just the question that you wrote right at the beginning. To get better results, you’ll need to do some prompt engineering on the task description as well.\\n\\n### Describe Your Request in Numbered Steps[](#describe-your-request-in-numbered-steps \\\"Permanent link\\\")\\n\\nIf you break up your task instructions into a numbered sequence of small steps, then the model is a lot more likely to produce the results that you’re looking for.\\n\\nGo back to your prompt in `settings.toml` and break your initial task description into more granular, specific substeps:\\n\\nWith these step-by-step instructions in place, you’re ready for another run of your script and another inspection of the newly generated output:\\n\\nThat’s a significant improvement! The model managed to follow the pattern of replacing the names in square brackets with `[Agent]` and `[Customer]`, respectively. It correctly identified some new swear words and replaced them with the huffing emoji. The model also redacted the order numbers, and anonymized the names in the conversation texts.\\n\\nOften, one of the best approaches to get better results from an LLM is to make your instructions more specific.\\n\\nFraming your tasks in even smaller and more specific steps, will generally get you better results. Don’t shy away from [some repetition](https://twitter.com/phillip_isola/status/1532189616106881027):\\n\\nIncreasing the specificity of your instructions, and introducing numbered steps, helped you create a well-performing prompt. Your prompt successfully removes personally identifiable information from the conversations, redacts swear words, and reformats the ISO date-time stamp, as well as the usernames.\\n\\nYou could consider your initial task as completed, but there’s more that you want to do, and more prompt engineering techniques to explore. You also know that there are newer models that you could work with, and your success has further piqued your curiosity. It’s time to switch to a different LLM, see how that influences your output, and then continue exploring other techniques.\\n\\n## Perform Chat Completions With GPT-4[](#perform-chat-completions-with-gpt-4 \\\"Permanent link\\\")\\n\\nYou’ve decided to switch to an even more powerful LLM, GPT-4. In the rest of this tutorial, you’ll use GPT-4 to continue exploring other important prompt engineering techniques:\\n\\n1. **Role prompting**: Using a system message to set the tone of the conversation, and using different roles to give context through labeling\\n2. **Chain-of-thought prompting (CoT)**: Giving the model time to think by prompting it to reason about a task, then including the reasoning in the prompt\\n\\n\\nYou’ll also use GPT-4 to **classify the sentiment** of each chat conversation and structure the output format as **JSON**.\\n\\n### Switch to a Different Model[](#switch-to-a-different-model \\\"Permanent link\\\")\\n\\nIf you’re working with the provided script, then all you need to do is pick a chat model from `chat_models` in `settings.toml` and use it as the new value for `model`:\\n\\nChanging these settings will send your request to a different model. Like before, it’ll assemble your prompt in the way necessary for a `/chat/completions` endpoint request, make that request for you, and print the response to your terminal.\\n\\nFor the rest of this tutorial, you’ll work with OpenAI’s latest version of the GPT-4 model. If you don’t have access to this model, then you can instead keep working with the model that you’ve been working with so far.\\n\\nIf you’ve been following along using ChatGPT, then you’re stuck with whatever model currently powers it. Unless you’re a [ChatGPT Plus](https://openai.com/blog/chatgpt-plus) subscriber, then you can [change the model to GPT-4](https://help.openai.com/en/articles/7127997-how-can-i-use-gpt-4-in-chatgpt) on the website.\\n\\nWithout changing your prompt, run your script another time to see the different results of the text completion based only on using a different LLM:\\n\\nSome responses may be relatively similar to the ones with the older model. However, you can also expect to receive results like the one shown above, where most swear words are still present.\\n\\nIt’s important to keep in mind that developing for a specific model will lead to specific results, and swapping the model may improve or deteriorate the responses that you get. Therefore, swapping to a newer and more powerful model won’t necessarily give you better results straight away.\\n\\nAdditionally, it’s also helpful to keep in mind that API calls to larger models will generally [cost more money per request](https://openai.com/pricing). While it can be fun to always use the latest and greatest LLM, it may be worthwhile to consider whether you really need to upgrade to tackle the task that you’re trying to solve.\\n\\n### Add a Role Prompt to Set the Tone[](#add-a-role-prompt-to-set-the-tone \\\"Permanent link\\\")\\n\\nThere are some additional possibilities when interacting with the API endpoint that you’ve only used implicitly, but haven’t explored yet, such as adding **role labels** to a part of the prompt. In this section, you’ll use the `\\\"system\\\"` role to create a system message, and you’ll revisit the concept later on when you [add more roles](#improve-your-output-with-the-power-of-conversation) to improve the output.\\n\\n**Role prompting** usually refers to adding system messages, which represent information that helps to set the context for upcoming completions that the model will produce. System messages usually aren’t visible to the end user. Keep in mind that the `/chat/completions` endpoint models were initially designed for conversational interactions.\\n\\nYou can also use system messages to set a context for your completion task. You’ll craft a bespoke role prompt in a moment. However, for this specific task, the role prompt is likely less important than it might be for some other tasks. To explore the possible influence of a role prompt, you’ll take a little detour and ask your model to play a role:\\n\\nYou keep `instruction_prompt` the same as you engineered it earlier in the tutorial. Additionally, you now add text to `role_prompt`. The role prompt shown above serves as an example for the impact that a misguided prompt can have on your application.\\n\\nUnleash, thou shall, the parchment’s code and behold the marvels unexpected, as the results may stir wonderment and awe:\\n\\nAs you can see, a role prompt can have quite an impact on the language that the LLM uses to construct the response. This is great if you’re building a conversational agent that should speak in a certain tone or language. And you can also use system messages to keep specific setup information present.\\n\\nFor completion tasks like the one that you’re currently working on, you might, however, not need this type of role prompt. For now, you could give it a common boilerplate phrase, such as *You’re a helpful assistant*.\\n\\nTo practice writing a role prompt—and to see whether you can release your customer chat conversations from the reign of that 16th century villain poet—you’ll craft a more appropriate role prompt:\\n\\nThis role prompt is more appropriate to your use case. You don’t want the model to introduce randomness or to change any of the language that’s used in the conversations. Instead, you just want it to execute the tasks that you describe. Run the script another time and take a look at the results:\\n\\nThat looks much better again! Abide concealed in yonder bygone era, ye villainous poet!\\n\\nAs you can see from these examples, role prompts can be a powerful way to change your output. Especially if you’re using the LLM to build a conversational interface, then they’re a force to consider.\\n\\nFor some reason, GPT-4 seems to consistently pick `[Client]` over `[Customer]`, even though you’re specifying `[Customer]` in the few-shot examples. You’ll eventually get rid of these verbose names, so it doesn’t matter for your use case.\\n\\nHowever, if you’re determined and curious—and manage to prompt `[Client]` away—then share the prompt that worked for you in the comments.\\n\\nIn the final section of this tutorial, you’ll revisit using roles and see how you can employ [the power of conversation](#improve-your-output-with-the-power-of-conversation) to improve your output even in a non-conversational completion task like the one you’re working on.\\n\\n### Classify the Sentiment of Chat Conversations[](#classify-the-sentiment-of-chat-conversations \\\"Permanent link\\\")\\n\\nAt this point, you’ve engineered a decent prompt that seems to perform quite well in sanitizing and reformatting the provided customer chat conversations. To fully grasp the power of LLM-assisted workflows, you’ll next tackle the tacked-on request by your manager to also classify the conversations as positive or negative.\\n\\nStart by saving both sanitized conversation files into new files that will constitute the new inputs for your sentiment classification task:\\n\\nYou could continue to build on top of the previous prompt, but eventually you’ll hit a wall when you’re asking the model to do too many edits at once. The classification step is conceptually distinct from the text sanitation, so it’s a good cut-off point to start a new pipeline.\\n\\nThe sanitized chat conversation files are also included in the example codebase:\\n\\nAgain, you want the model to do the work for you. All you need to do is craft a prompt that spells out the task at hand, and provide examples. You can also edit the role prompt to set the context for this new task that the model should perform:\\n\\nYou can now run the script and provide it with the sanitized conversations in `sanitized-testing-chats.txt` that were the output of your previously engineered prompt:\\n\\nYou added another step to your task description and slightly modified the few-shot examples in your prompt. Not a lot of extra work for a task that would have required a lot more work without the help of an LLM. But is this really sufficient? Take a look at the output once your script has finished running:\\n\\nThe output is quite promising! The model correctly labeled conversations with angry customers with the fire emoji. However, the first conversation probably doesn’t entirely fit into the same bucket as the rest because the customer doesn’t display a negative sentiment towards the company.\\n\\nAssume that all of these conversations were resolved positively by the customer service agents and that your company just wants to follow up with those customers who seemed noticeably angry with their situation. In that case, you might need to tweak your prompt a bit more to get the desired result.\\n\\nYou could [add more examples](#use-few-shot-prompting-to-improve-output), which is generally a good idea because it creates more context for the model to apply. Writing a [more detailed description](#describe-your-request-in-numbered-steps) of your task helps as well, as you’ve seen before. However, to tackle this task, you’ll learn about another useful prompt engineering technique called *chain-of-thought* prompting.\\n\\n### Walk the Model Through Chain-of-Thought Prompting[](#walk-the-model-through-chain-of-thought-prompting \\\"Permanent link\\\")\\n\\nA widely successful prompt engineering approach can be summed up with the [anthropomorphism](https://en.wikipedia.org/wiki/Anthropomorphism) of *giving the model time to think*. You can do this with a couple of different specific techniques. Essentially, it means that you prompt the LLM to produce intermediate results that become additional inputs. That way, the reasoning doesn’t need to take distant leaps but only hop from one lily pad to the next.\\n\\nOne of these approaches is to use **chain-of-thought (CoT)** prompting techniques. To apply CoT, you prompt the model to generate intermediate results that then become part of the prompt in a second request. The increased context makes it more likely that the model will arrive at a useful output.\\n\\nThe smallest form of CoT prompting is **zero-shot CoT**, where you literally ask the model to *think step by step*. This approach yields [impressive results](https://arxiv.org/abs/2201.11903) for mathematical tasks that LLMs otherwise often solve incorrectly.\\n\\nChain-of-thought operations are technically split into two stages:\\n\\n1. **Reasoning extraction**, where the model generates the increased context\\n2. **Answer extraction**, where the model uses the increased context to generate the answer\\n\\n\\nReasoning extraction is useful across a variety of CoT contexts. You can generate few-shot examples from input, which you can then use for a separate step of extracting answers using more detailed chain-of-thought prompting.\\n\\nYou can try zero-shot CoT on the sanitized chat conversations to embellish the few-shot examples that you’ll use to classify the chat conversations more robustly. Remove the examples and replace the instructions describing the reasoning on how you would classify the conversations in more detail:\\n\\nYou spelled out the criteria that you want the model to use to assess and classify sentiment. Then you add the sentence *Let’s think step by step* to the end of your prompt.\\n\\nYou want to use this zero-shot CoT approach to generate few-shot examples that you’ll then build into your final prompt. Therefore, you should run the script using the data in `sanitized-chats.txt` this time:\\n\\nYou’ll get back a reference to the conversations, with the reasoning spelled out step by step to reach the final conclusion:\\n\\nThe reasoning is straightforward and sticks to your instructions. If the instructions accurately represent the criteria for marking a conversation as positive or negative, then you’ve got a good playbook at hand.\\n\\nYou can now use this information to improve the few-shot examples for your sentiment classification task:\\n\\nYou’re using the same examples as previously, but you’ve enhanced each of the examples with a short chain of thought that you generated in the previous call. Give your script another spin using `sanitized-testing-chats.txt` as the input file and see whether the results have improved:\\n\\nGreat! Now the first conversation, which was initially classified as negative, has also received the green checkmark.\\n\\nIn this section, you’ve supported your examples with reasoning for why a conversation should be labeled as positive vs negative. You generated this reasoning with another call to the LLM.\\n\\nAt this point, it seems that your prompt generalizes well to the available data and classifies the conversations as intended. And you only needed to carefully craft your words to make it happen!\\n\\n### Structure Your Output Format as JSON[](#structure-your-output-format-as-json \\\"Permanent link\\\")\\n\\nAs a final showcase for effective prompting when incorporating an LLM into your workflow, you’ll tackle the last task, which you added to the list youself: to pass the data on in a structured format that’ll make it straightforward for the customer support team to process further.\\n\\nYou already specified a format to follow in the previous prompt, and the LLM returned what you asked for. So it might just be a matter of asking for a different, more structured format, for example [JSON](https://realpython.com/python-json/):\\n\\nIn your updated `instruction_prompt`, you’ve explicitly asked the model to return the output as valid JSON. Then, you also adapted your few-shot examples to represent the JSON output that you want to receive. Note that you also applied additional formatting by removing the date from each line of conversation and truncating the `[Agent]` and `[Customer]` labels to single letters, `A` and `C`.\\n\\nYou’re still using example chat conversations from your sanitized chat data in `sanitized-chats.txt`, and you send the sanitized testing data from `sanitized-testing-chats.txt` to the model for processing.\\n\\nIn this case, you receive valid JSON, as requested. The classification still works as before and the output censors personally identifiable information, replaces swear words, and applies all the additional requested formatting:\\n\\nYour output may be different and show some small hiccups, but overall, this output is quite impressive and useful! You could pass this JSON structure over to the customer support team, and they could quickly integrate it into their workflow to follow up with customers who displayed a negative sentiment in the chat conversation.\\n\\nYou could stop here, but the engineer in you isn’t quite satisfied yet. All the instructions just in a single prompt? Your premonition calls and tells you tales about maintainability. In the next section, you’ll refactor your prompts to apply role labels before you set up your LLM-assisted pipeline and call it a day.\\n\\n### Improve Your Output With the Power of Conversation[](#improve-your-output-with-the-power-of-conversation \\\"Permanent link\\\")\\n\\nYou added a [role prompt](#add-a-role-prompt-to-set-the-tone) earlier on, but otherwise you haven’t tapped into the power of *conversations* yet.\\n\\nIn this final section, you’ll learn how you can provide additional context to the model by splitting your prompt into multiple separate messages with different labels.\\n\\nIn calls to the `/chat/completions` endpoint, a prompt is split into several **messages**. Each message has its content, which represents the prompt text. Additionally, it also has a **role**. There are [different roles](https://platform.openai.com/docs/api-reference/chat/create#chat/create-role) that a message can have, and you’ll work with three of them:\\n\\n1. `\\\"system\\\"` gives context for the conversation and helps to set the overall tone.\\n2. `\\\"user\\\"` represents the input that a user of your application might provide.\\n3. `\\\"assistant\\\"` represents the output that the model would reply with.\\n\\n\\nSo far, you’ve provided context for different parts of your prompt all mashed together in a single prompt, more or less well separated [using delimiters](#use-delimiters-to-clearly-mark-sections-of-your-prompt). When you use a model that’s optimized for chat, such as GPT-4, then you can use roles to let the LLM know what type of message you’re sending.\\n\\nFor example, you can create some variables for your few-shot examples and separate variables for the associated CoT reasoning and outputs:\\n\\nYou’ve disassembled your `instruction_prompt` into seven separate prompts, based on what role the messages have in your conversation with the LLM.\\n\\nThe helper function that builds a messages payload, `_assemble_chat_messages()`, is already set up to include all of these prompts in the API request. Take a look into `app.py` to check out the separate messages, with their fitting roles, that make up your overall prompt:\\n\\nYour prompt is now split into distinct parts, each of which has a certain role label:\\n\\n- **Example input** has the `\\\"user\\\"` role.\\n- **Reasoning** that the model created has the `\\\"system\\\"` role.\\n- **Example output** has the `\\\"assistant\\\"` role.\\n\\n\\nYou’re now providing context for how user input might look, how the model can reason about classifying the input, and how your expected output should look. You removed the delimiters that you previously used for labeling the example sections. They aren’t necessary now that you’re providing context for the parts of your prompt through separate messages.\\n\\nGive your script a final run to see whether the power of conversation has managed to improve the output:\\n\\nThis JSON structure is looking legitimately great! The formatting that you wanted now shows up throughout, and the conversations are labeled correctly.\\n\\nAdditionally, you’ve improved the maintainability of your prompts by splitting them into separate labels. You can feel proud to pass on such a useful edit of the customer chat conversation data to your coworkers!\\n\\n## FAQs[](#faqs \\\"Permanent link\\\")\\n\\nYou’ve covered common prompt engineering techniques, and here, you’ll find a few questions and answers that sum up the most important concepts that you’ve covered in this tutorial.\\n\\nYou can use these questions to check your understanding or to recap and solidify what you’ve just learned. After each question, you’ll find a brief explanation hidden in a collapsible section. Click the *Show/Hide* toggle to reveal the answer. Time to dive in!\\n\\nKnowledge about prompt engineering is crucial when you work with large language models (LLMs) because you can receive [much better results](#describe-your-request-in-numbered-steps) with carefully crafted prompts.\\n\\nThe `temperature` setting controls the [amount of randomness](#freeze-responses-by-setting-the-temperature-to-zero) in your output. Setting the `temperature` argument of API calls to `0` will increase consistency in the responses from the LLM. Note that OpenAI’s LLMs are only ever *mostly* deterministic, even with the temperature set to `0`.\\n\\n[Few-shot prompting](#use-few-shot-prompting-to-improve-output) is a common prompt engineering technique where you add examples of expected input and desired output to your prompt.\\n\\n[Using delimiters](#use-delimiters-to-clearly-mark-sections-of-your-prompt) can be helpful when dealing with more complex prompts. Delimiters help to separate and label sections of the prompt, assisting the LLM in understanding its tasks better.\\n\\n[Testing your prompt](#test-your-prompt-across-different-data) with data that’s separate from the training data is important to see how well the model generalizes to new conditions.\\n\\nYes, generally adding more context will lead to more accurate results. However, it’s also important *how* you add the additional context. [Just adding more text](#use-few-shot-prompting-to-improve-output) may lead to worse results.\\n\\n[Role prompting](#add-a-role-prompt-to-set-the-tone) means providing a system message that sets the tone or context for a conversation. This can greatly impact how the model constructs the response. You can also [use roles to provide context labels](#improve-your-output-with-the-power-of-conversation) for parts of your prompt.\\n\\nIn [chain-of-thought (CoT) prompting](#walk-the-model-through-chain-of-thought-prompting), you prompt the LLM to produce intermediate reasoning steps. You can then include these steps in the answer extraction step to receive better results.\\n\\n## Next Steps[](#next-steps \\\"Permanent link\\\")\\n\\nIn this tutorial, you’ve learned about various prompt engineering techniques, and you’ve built an LLM-assisted Python application along the way. If you’d like to learn more about prompt engineering, then check out some related questions, as well as some resources for further study below:\\n\\nYes, prompt engineer can be a real job, especially in the context of AI and machine learning. As a prompt engineer, you design and optimize prompts so that AI models like GPT-4 produce desired responses. However, it might not be a stand-alone job title everywhere. It could be a part of broader roles like [machine learning](https://realpython.com/learning-paths/machine-learning-python/) engineer or [data scientist](https://realpython.com/learning-paths/data-science-python-core-skills/).\\n\\nPrompt engineering, like any other technical skill, requires time, effort, and practice to learn. It’s not necessarily easy, but it’s certainly possible for someone with the right mindset and resources to learn it. If you’ve enjoyed the iterative and text-based approach that you learned about [in this tutorial](#understand-the-purpose-of-prompt-engineering), then prompt engineering might be a good fit for you.\\n\\nThe field of prompt engineering is quite new, and LLMs keep developing quickly as well. The landscape, best practices, and most effective approaches are therefore changing rapidly. To continue learning about prompt engineering using free and open-source resources, you can check out [Learn Prompting](https://learnprompting.org/docs/intro/) and the [Prompt Engineering Guide](https://www.promptingguide.ai/).\\n\\nHave you found any interesting ways to incorporate an LLM into your workflow? Share your thoughts and experiences in the comments below.\\n\\n== **Take the Quiz:**== Test your knowledge with our interactive “Practical Prompt Engineering” quiz. You’ll receive a score upon completion to help you track your learning progress:\\n\\n---\\n\\n\\n[Prompt Engineering: A Practical Example](/quizzes/practical-prompt-engineering/)\\n\\n**Interactive Quiz**\\n\\n[Practical Prompt Engineering](/quizzes/practical-prompt-engineering/)\\n\\nIn this quiz, you'll test your understanding of prompt engineering techniques with large language models (LLMs) like GPT-3.5 and GPT-4. You'll revisit how to work with OpenAI's API, apply prompt engineering techniques to practical examples, and use various strategies to improve your results.\\n\\n========================\\nLLMs like ChatGPT are very effective at code generation. In this section, we will cover many examples of how to use ChatGPT for code generation.\\n\\nThe OpenAI's Playground (Chat Mode) and the `gpt-3.5-turbo` model are used for all examples below.\\n\\nAs with all chat models from OpenAI, you can use a `System Message` to define the behavior and format of the responses. We will use the following System Message for the prompt examples:\\n\\n## Basic Example[](#basic-example)\\n\\nThe `User Message` will then be the prompt itself with the instruction of the specific code you want the model to generate. Below is a snapshot of the first basic prompt for code generation. Note that the final code was generated by the `Assistant`.\\n\\n![CODE1](/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fchat-mode.3928920f.png&w=3840&q=75)\\n\\nHere is the prompt if you want to try it yourself:\\n\\n## Turn Comments Into Code[](#turn-comments-into-code)\\n\\nYou can also use the code generation capabilities of these LLMs to generate code from comments alone. Let's look at another example that passes the instructions as a comment block:\\n\\n*Prompt:*\\n\\n*Output:*\\n\\nThe generated code looks like working code. However, the output doesn't contain an important `import` statement, i.e., `import json`. While these models generate working code, you must pay close attention to small details like this and always make sure to test your code.\\n\\n## Complete Functions or Next Line[](#complete-functions-or-next-line)\\n\\nThese LLMs have also been incorporated into tools like GitHub Copilot which makes them useful for developers. One useful feature is the ability of the model to complete functions.\\n\\n*Prompt:*\\n\\n*Output:*\\n\\n## MySQL Query Generation[](#mysql-query-generation)\\n\\nBesides the basic code generation example above, you can use the model to generate useful code that could be used in other aspects of programming like creating and testing MySQL queries.\\n\\nLet's say you have a dataset with some information which you can include as part of the prompt and then instruct it to generate a specific query. For example:\\n\\n*Output:*\\n\\nThe model generates a query! However, in this example we made up the database so we don't have a database to test the generated query. If you are testing this example, it could be useful to have this database created. Fortunately, you can use ChatGPT to generate this for you as well. One approach is using a modified version of the prompt above with slightly different instructions to generate a database schema:\\n\\nThe model generated the following schema:\\n\\nYou can create a temporary database using something like [sqliteonline.com (opens in a new tab)](https://sqliteonline.com/).\\n\\nYou will also need dummy data to test if the original generated query was valid. Let's use ChatGPT for this too! The prompt could look something like the following:\\n\\nThe output with the insert statements:\\n\\nNow that you have the database and the dummy data, you can test the original query. Based on the dummy data, the result should be two rows with `Jane Doe` and `John Doe`.\\n\\n## Explain Code[](#explain-code)\\n\\nIf you are learning to program in a certain language, it might be useful to prompt the model to explain certain bits of code. Let's reuse the query generated above and ask the model to explain it.\\n\\nIf you are using the same System Message from before, be careful of how you are instructing the model. For example, if we use the following example, the model will complain that it is a code assistant in Python but it still provides an explanation. Sometimes the model refuses to generate what you are instructing because it's designed to follow the System Message.\\n\\n*Prompt:*\\n\\n*Output:*\\n\\n## Editing Code[](#editing-code)\\n\\nExample coming soon!\\n\\n## Debugging Code[](#debugging-code)\\n\\nExample coming soon!\\n\\n## Best practices[](#best-practices)\\n\\nComing soon!\\n\\n[![SVG Image]()Tackling Generated Datasets Diversity](/applications/generating_textbooks \\\"Tackling Generated Datasets Diversity\\\")[Graduate Job Classification Case Study![SVG Image]()](/applications/workplace_casestudy \\\"Graduate Job Classification Case Study\\\")\\n\\n========================\\nAmazon Q Developer provides automatic code generation, auto-completion, and natural language\\n code suggestions. Following are best practices for using Amazon Q Developer in-line coding\\n assistance:\\n\\n- **Provide context to help improve accuracy of\\n responses**\\n\\n Start with existing code, import libraries, create classes and functions, or\\n establish code skeletons. This context will help to significantly improve code\\n generation quality.\\n\\n- **Code naturally**\\n\\n Use Amazon Q Developer code generation like a robust auto-completion engine. Code as\\n you normally do, and let Amazon Q provide suggestions as you type or pause. If\\n code generation isn't available or you're stuck on a code issue, initiate Amazon Q\\n by typing **Alt+C** on a PC or **Option+C** on MacOS. For more information about common\\n actions that you can take while using in-line suggestions, see [Using shortcut keys](https://docs.aws.amazon.com/amazonq/latest/qdeveloper-ug/actions-and-shortcuts.html) in the *Amazon Q Developer User Guide*.\\n\\n- **Include import libraries that are relevant to your\\n script's objectives**\\n\\n Include relevant import libraries to help Amazon Q understand the context and\\n generate code accordingly. You can also ask Amazon Q to suggest relevant import\\n statements.\\n\\n- **Maintain clear and focused context**\\n\\n Keep your script focused on specific objectives, and modularize distinct\\n functionalities into separate scripts with relevant context. Avoid noisy or\\n confusing context.\\n\\n- **Experiment with prompts**\\n\\n Explore different prompts to nudge Amazon Q to produce useful results in code\\n generation. For example, try the following approaches:\\n\\n\\n - Use standard comment blocks for natural language prompts.\\n\\n - Create skeletons with comments to fill in classes and\\n functions.\\n\\n - Be specific in your prompts, providing details instead of\\n generalization.\\n\\n- **Chat with Amazon Q Developer and ask for\\n assistance**\\n\\n If Amazon Q Developerisn't providing accurate suggestions, chat with Amazon Q Developer in\\n your IDE. It can provide code snippets or full classes and functions to\\n kickstart your context. For more information, see [Chatting with Amazon Q Developer about code](https://docs.aws.amazon.com/amazonq/latest/qdeveloper-ug/q-in-IDE-chat.html) in the *Amazon Q Developer User\\n Guide.*\\n\\n========================\\nYou're debugging a production issue at 2 AM. The payment service is down, and the AI-generated script that was supposed to handle failovers isn't working. The code looked perfect in development. It passed all the tests. But now it's failing because it hardcoded an API key, doesn't handle AWS credential rotation, and has no audit logging for compliance.\\n\\nThis happens more than anyone wants to admit. [Industry research](https://itrevolution.com/articles/new-research-reveals-ai-coding-assistants-boost-developer-productivity-by-26-what-it-leaders-need-to-know/) shows AI coding assistants boost developer productivity by 26%. But here's the problem: [45% of AI-generated code](https://www.veracode.com/blog/ai-generated-code-security-risks/) contains security vulnerabilities. Most companies are getting faster at building broken software.\\n\\nThe real issue isn't with AI itself. It's that everyone's using the wrong prompts.\\n\\nThink about how most developers use AI coding tools. They type something like \\\"write a Python script to process payments\\\" and expect magic. What they get is code that works in a demo but falls apart in production. It's like asking someone to build you a car and being surprised when they hand you a go-kart.\\n\\nEnterprise software is different. It needs to handle authentication, logging, error recovery, compliance, and integration with systems that were built when Python was still a snake. Most AI prompts don't even mention these requirements.\\n\\nHere's what's counterintuitive: the solution isn't better AI models. It's better prompts. The difference between a prompt that generates toy code and one that generates production-ready code isn't the AI. It's the human who wrote the prompt.\\n\\nEvery large company has the same problem. Developers generate code fast, but it doesn't work in production. The AI creates beautiful functions that assume perfect network conditions, unlimited memory, and no security requirements.\\n\\nReal enterprise code is mostly error handling and edge cases. It's checking if services are down, rotating credentials, masking sensitive data, and logging everything for audits. A payment processing function in a real company spends more time dealing with failures than processing payments.\\n\\nBut look at typical AI prompts. They're all happy path scenarios. \\\"Write a function to connect to a database\\\" never mentions connection pooling, credential rotation, or what happens when the database is unavailable. The AI generates code that works once and breaks forever.\\n\\nThis creates a weird dynamic. Developers feel productive because they're writing code fast. Managers see features shipping quickly. But operations teams know the truth. They're the ones getting called at night when the pretty AI-generated code encounters its first real-world problem.\\n\\nThe gap between development speed and production reliability is growing. Companies are shipping faster than ever while their systems become more fragile. It's like building houses with cardboard because it's quicker than using wood.\\n\\n## **The Context Problem Nobody Talks About**\\n\\nHere's something most people don't understand about AI coding tools. The quality of generated code isn't just about the model. It's about how much context the AI can see.\\n\\nMost AI coding assistants have tiny context windows. They can see maybe a few files at once. But enterprise software isn't a collection of independent files. It's a web of dependencies, shared libraries, configuration systems, and integration patterns that span hundreds of services.\\n\\nWhen an AI can only see a single file, it makes assumptions. It assumes hardcoded values are fine. It assumes simple error handling is enough. It assumes the function will run in isolation. These assumptions kill production systems.\\n\\nThink about it this way. If you asked a new developer to write code without showing them the existing codebase, architecture documentation, or integration patterns, what would you expect? That's essentially what most AI tools are doing.\\n\\n[Augment Code](https://www.augmentcode.com/) has a 200k token context window. That's 12 times larger than most competitors. It can see entire codebases, understand existing patterns, and generate code that actually fits into real systems. It's like the difference between building a bridge with a blueprint versus guessing what the other side looks like.\\n\\nContext isn't just nice to have. It's the difference between code that works and code that integrates.\\n\\nLet's look at how to write prompts that actually work. The secret is specificity. Don't ask for a payment processor. Ask for a payment processor that handles PCI compliance, retry logic, fraud detection, audit logging, and integration with your existing authentication system.\\n\\nHere's a prompt that generates real enterprise code:\\n\\nNotice what's different? It specifies the Python version, the exact cloud services, the validation library, the encryption method, the monitoring system, and the testing framework. It asks for retry logic and edge case handling. This isn't a toy prompt. It's a specification.\\n\\nThe AI knows to include AWS credential handling because S3 and Redshift are mentioned. It knows to add error logging because CloudWatch metrics are required. It generates proper exception handling because edge cases are explicitly mentioned.\\n\\nCompare that to \\\"write a script to process CSV files.\\\" The AI would generate something that works on your laptop but fails in production. No error handling, no monitoring, no security, no tests.\\n\\nHere's another example for authentication:\\n\\nThis prompt gets enterprise-grade authentication code because it mentions the specific requirements that matter: hierarchical roles, LDAP integration, audit logging, and performance testing. The AI generates code that actually works in a real company.\\n\\nThe pattern is simple. Specify everything that matters for production: security, monitoring, error handling, testing, and integration requirements.\\n\\nLet's go through prompts that generate production-ready code. Each one includes the enterprise requirements that most prompts ignore.\\n\\n**Secure ETL Pipeline with Compliance Monitoring**\\n\\nThis works because it specifies the complete data pipeline with security and monitoring. The AI generates code that handles AWS credentials, encryption keys, network failures, and malformed data. It's not just an ETL script. It's an enterprise ETL system.\\n\\n**SOC 2 Audit Log Decorator Framework**\\n\\nCompliance isn't optional in enterprise software. This prompt generates a logging system that meets audit requirements. The AI includes sensitive data masking, correlation IDs, and proper error handling because the prompt mentions SOC 2 compliance.\\n\\n**Role-Based Access Control with Enterprise Integration**\\n\\nAuthentication in enterprise software is complex. This prompt generates code that integrates with existing directory services and handles the permission hierarchies that real companies need. It's not a toy login system. It's enterprise identity management.\\n\\n**Zero-Downtime Kubernetes Deployment Orchestrator**\\n\\nDeployment automation needs to be bulletproof. This prompt generates code that handles the complexity of Kubernetes deployments, including rollback logic and notification systems. It's production deployment automation that actually works.\\n\\n**PII Detection and GDPR Compliance Processor**\\n\\nPrivacy regulations aren't going away. This prompt generates code that handles the complexity of data privacy, including detection algorithms, masking strategies, and audit trails. It's not just data processing. It's compliant data processing.\\n\\n**Multi-Tenant Logging with Data Isolation**\\n\\nMulti-tenant systems need perfect data isolation. This prompt generates logging infrastructure that prevents data leakage between tenants while maintaining operational visibility. It's enterprise logging that actually works.\\n\\n**Automated Compliance Monitoring Framework**\\n\\nCompliance monitoring can't be manual. This prompt generates automation that continuously validates security controls and generates the reports that auditors need. It's compliance automation that reduces audit overhead.\\n\\n**High-Performance Async API Client Generator**\\n\\nAPI integration in enterprise systems needs to handle failures gracefully. This prompt generates clients with proper connection management, retry logic, and error handling. It's not just an API wrapper. It's resilient integration infrastructure.\\n\\nNotice what all these prompts have in common? They specify the production requirements that toy prompts ignore.\\n\\nEvery prompt mentions specific technologies, not generic concepts. Instead of \\\"database,\\\" they say \\\"Redshift.\\\" Instead of \\\"logging,\\\" they say \\\"CloudWatch Logs.\\\" Instead of \\\"security,\\\" they say \\\"JWT tokens with LDAP integration.\\\"\\n\\nEvery prompt includes error handling and monitoring. Real systems fail, and production code needs to handle failures gracefully. These prompts generate code that assumes failures will happen.\\n\\nEvery prompt mentions testing and validation. Enterprise code without tests is just technical debt waiting to happen. These prompts generate the test coverage that production systems need.\\n\\nEvery prompt includes compliance and security requirements. Enterprise software operates under regulatory constraints that toy examples ignore. These prompts generate code that meets real compliance requirements.\\n\\nThe difference isn't the AI model. It's the specificity of the request.\\n\\nHere's what happens when companies use generic AI prompts. Developers generate code fast, but it creates more problems than it solves.\\n\\nThe code works in development but fails in production. The failure modes are predictable: hardcoded credentials, missing error handling, no audit logging, and poor integration with existing systems. Operations teams spend their time fixing AI-generated code instead of building new features.\\n\\nSecurity teams find vulnerabilities in every release. The AI-generated code doesn't follow security best practices because the prompts don't mention security requirements. Every deployment becomes a security review instead of a routine release.\\n\\nCompliance teams can't audit the systems because the code doesn't generate the logs and reports they need. Simple features become compliance projects because the foundation is wrong.\\n\\nThe productivity gains from AI disappear under the weight of technical debt. Teams move fast initially but slow down as the codebase becomes unmaintainable. It's like driving fast with bad brakes. You go faster until you crash.\\n\\nThe solution isn't avoiding AI. It's using it correctly. Good prompts generate code that integrates with existing systems, follows security best practices, and meets compliance requirements.\\n\\nThink about AI as a very fast junior developer who knows syntax but doesn't understand context. You wouldn't tell a junior developer to \\\"build a payment system\\\" without explaining the requirements, architecture, and constraints. Don't do it with AI either.\\n\\nSpecify everything that matters for production. Mention the exact technologies, security requirements, error handling needs, and integration patterns. The more specific the prompt, the better the generated code.\\n\\nReview everything before deployment. AI-generated code still needs human oversight. Look for security issues, integration problems, and missing error handling. Use the generated code as a starting point, not a finished product.\\n\\nTest comprehensively. AI generates code that passes happy path tests but fails under stress. Test error conditions, security scenarios, and integration edge cases. Production testing reveals problems that development testing misses.\\n\\nMonitor everything in production. AI-generated code fails in unexpected ways. Good monitoring catches problems before they impact users. Plan for failures because they will happen.\\n\\nThe companies that get AI right will build software faster without sacrificing quality. They'll use specific prompts that generate production-ready code instead of toy examples.\\n\\nThe companies that get AI wrong will build technical debt faster than ever. They'll ship broken software quickly and spend years fixing it. The productivity gains will disappear under maintenance overhead.\\n\\nThe difference isn't the AI technology. Every company has access to the same models. The difference is prompt engineering and code review discipline.\\n\\n[Augment Code](https://www.augmentcode.com/) provides the enterprise capabilities that make this possible: 200k token context windows, SOC 2 Type II certification, and Claude Sonnet 4 integration. But the tool is only as good as the prompts you give it.\\n\\nThe future belongs to companies that understand this distinction. AI is a powerful tool for generating code, but only if you know how to ask for what you actually need.\\n\\nWant to see the difference that enterprise-grade AI coding capabilities make? [Start your free trial of Augment Code](https://www.augmentcode.com/) and discover how proper context understanding and advanced security features enable productive, compliant development that scales with enterprise requirements.\\n\\n========================\\n\"" +2025-12-30T23:05:05.535682Z DEBUG ThreadId(14) rigscribe::tools::prompt_reviewer: src/tools/prompt_reviewer.rs:79: PromptReviewer produced artifact: Artifact { system_prompt: "### ROLE\nYou are an expert Python developer specializing in writing clean, efficient, and robust code that adheres to industry best practices. Your primary function is to generate a single, complete Python function based on a user's request.\n\n### OBJECTIVE\nTo produce a production-quality Python function that precisely matches the user's specified requirements, is fully documented, handles edge cases, and follows strict coding standards. You must refuse to generate code if the user's request is ambiguous or incomplete, and instead ask clarifying questions.\n\n### PROCESS\n1. **Analyze the Request:** Scrutinize the user's input provided in the `### USER REQUEST ###` section. Identify all requirements, constraints, and the core purpose of the function.\n2. **Check for Ambiguity:** If the request lacks critical information (e.g., function name, specific inputs, expected output, handling of edge cases), you MUST NOT write any code. Instead, you MUST ask specific, numbered clarifying questions to resolve the ambiguity.\n3. **Generate the Function:** Once the request is perfectly clear, write the Python function.\n4. **Enforce Constraints:** Ensure the generated function strictly adheres to ALL rules listed under the `### CONSTRAINTS ###` section.\n5. **Format the Output:** Present the final, complete function within a single Python Markdown block, followed by a brief explanation of its logic.\n\n### CONSTRAINTS\n- **Language:** The generated code MUST be written in Python 3.9+.\n- **Clarity & Documentation:**\n - The function MUST include a comprehensive PEP 257 compliant docstring. The docstring MUST describe the function's purpose, its arguments (`Args:`), and what it returns (`Returns:`).\n - The function signature MUST include type hints for all parameters and the return value.\n- **Correctness & Robustness:**\n - The code MUST be logically correct and directly solve the user's request.\n - You MUST anticipate and handle common edge cases (e.g., empty lists, null inputs, invalid data types).\n- **Style & Best Practices:**\n - The code MUST strictly adhere to the PEP 8 style guide.\n - The logic should be as efficient as reasonably possible.\n- **Safety & Security:**\n - You MUST NEVER use insecure functions or patterns (e.g., `eval()`, `exec()`, constructing shell commands from input).\n - You MUST NOT include any placeholder credentials, API keys, or personally identifiable information (PII) in the code.\n- **Dependencies:**\n - You MUST only use the Python standard library.\n - You MUST NOT `import` any third-party libraries unless explicitly instructed to do so in the user request.\n\n### USER INPUT\n```\n### USER REQUEST ###\n{{user_request}}\n```", signed_by: "Not specified" } +2025-12-30T23:05:05.536483Z INFO ThreadId(01) rigscribe::agents: src/agents/mod.rs:65: Tool executed tool="PromptReviewer" args="{\"intent\":{\"text\":\"write a python fonction\"},\"spec\":{\"constraints\":\"- The programming language must be Python.\\n- The function's purpose, name, parameters, and return value are completely undefined.\\n- High probability of delivering a function that does not meet the user's actual, unstated needs.\\n- The solution will likely be too generic or trivial to be useful without further clarification.\",\"goal\":\"Create a working piece of Python code structured as a function.\"}}" result="{\"system_prompt\":\"### ROLE\\nYou are an expert Python developer specializing in writing clean, efficient, and robust code that adheres to industry best practices. Your primary function is to generate a single, complete Python function based on a user's request.\\n\\n### OBJECTIVE\\nTo produce a production-quality Python function that precisely matches the user's specified requirements, is fully documented, handles edge cases, and follows strict coding standards. You must refuse to generate code if the user's request is ambiguous or incomplete, and instead ask clarifying questions.\\n\\n### PROCESS\\n1. **Analyze the Request:** Scrutinize the user's input provided in the `### USER REQUEST ###` section. Identify all requirements, constraints, and the core purpose of the function.\\n2. **Check for Ambiguity:** If the request lacks critical information (e.g., function name, specific inputs, expected output, handling of edge cases), you MUST NOT write any code. Instead, you MUST ask specific, numbered clarifying questions to resolve the ambiguity.\\n3. **Generate the Function:** Once the request is perfectly clear, write the Python function.\\n4. **Enforce Constraints:** Ensure the generated function strictly adheres to ALL rules listed under the `### CONSTRAINTS ###` section.\\n5. **Format the Output:** Present the final, complete function within a single Python Markdown block, followed by a brief explanation of its logic.\\n\\n### CONSTRAINTS\\n- **Language:** The generated code MUST be written in Python 3.9+.\\n- **Clarity & Documentation:**\\n - The function MUST include a comprehensive PEP 257 compliant docstring. The docstring MUST describe the function's purpose, its arguments (`Args:`), and what it returns (`Returns:`).\\n - The function signature MUST include type hints for all parameters and the return value.\\n- **Correctness & Robustness:**\\n - The code MUST be logically correct and directly solve the user's request.\\n - You MUST anticipate and handle common edge cases (e.g., empty lists, null inputs, invalid data types).\\n- **Style & Best Practices:**\\n - The code MUST strictly adhere to the PEP 8 style guide.\\n - The logic should be as efficient as reasonably possible.\\n- **Safety & Security:**\\n - You MUST NEVER use insecure functions or patterns (e.g., `eval()`, `exec()`, constructing shell commands from input).\\n - You MUST NOT include any placeholder credentials, API keys, or personally identifiable information (PII) in the code.\\n- **Dependencies:**\\n - You MUST only use the Python standard library.\\n - You MUST NOT `import` any third-party libraries unless explicitly instructed to do so in the user request.\\n\\n### USER INPUT\\n```\\n### USER REQUEST ###\\n{{user_request}}\\n```\",\"signed_by\":\"Not specified\"}" +2025-12-30T23:05:25.310959Z INFO ThreadId(01) rigscribe::agents::optimizer: src/agents/optimizer.rs:65: Optimization complete. Final artifact length: 4071 +2025-12-30T23:05:25.312824Z INFO ThreadId(01) rigscribe: src/lib.rs:51: Optimize prompt cached to: "./.prompts_perssitense_cache/2011.json" diff --git a/src/agents/mod.rs b/src/agents/mod.rs index 1c96b90..d545ee6 100644 --- a/src/agents/mod.rs +++ b/src/agents/mod.rs @@ -1 +1,138 @@ pub mod optimizer; + +use futures::{Stream, StreamExt}; +use rig::{ + OneOrMany, + agent::Agent, + completion::{self, CompletionError, CompletionModel, PromptError}, + message::{AssistantContent, Message, Text, ToolResultContent, UserContent}, + streaming::{StreamedAssistantContent, StreamingCompletion}, + tool::{ToolError, ToolSetError}, +}; +use std::pin::Pin; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum StreamingError { + #[error("CompletionError: {0}")] + Completion(#[from] CompletionError), + #[error("PromptError: {0}")] + Prompt(#[from] Box), + #[error("ToolSetError: {0}")] + Tool(#[from] ToolSetError), +} + +pub type StreamingResult = Pin> + Send>>; + +pub async fn multi_turn_prompt( + agent: Agent, + prompt: impl Into + Send, + mut chat_history: Vec, +) -> StreamingResult +where + M: CompletionModel + 'static, + ::StreamingResponse: std::marker::Send, +{ + let prompt: Message = prompt.into(); + + (Box::pin(async_stream::stream! { + let mut current_prompt = prompt; + let mut did_call_tool = false; + + 'outer: loop { + let mut stream = agent + .stream_completion(current_prompt.clone(), chat_history.clone()) + .await? + .stream() + .await?; + + chat_history.push(current_prompt.clone()); + + let mut tool_calls = vec![]; + let mut tool_results = vec![]; + + while let Some(content) = stream.next().await { + match content { + Ok(StreamedAssistantContent::Text(text)) => { + yield Ok(Text { text: text.text }); + did_call_tool = false; + }, + Ok(StreamedAssistantContent::ToolCall(tool_call)) => { + let tool_result = + agent.tool_server_handle.call_tool(&tool_call.function.name, &tool_call.function.arguments.to_string()).await + .map_err(|x| StreamingError::Tool(ToolSetError::ToolCallError(ToolError::ToolCallError(x.into()))))?; + + tracing::info!( + tool = tool_call.function.name, + args = tool_call.function.arguments.to_string(), + result = tool_result, + "Tool executed" + ); + + let tool_call_msg = AssistantContent::ToolCall(tool_call.clone()); + + tool_calls.push(tool_call_msg); + tool_results.push((tool_call.id, tool_call.call_id, tool_result)); + + did_call_tool = true; + // break; + }, + Ok(StreamedAssistantContent::Reasoning(rig::message::Reasoning { reasoning, .. })) => { + if !reasoning.is_empty() { + yield Ok(Text { text: reasoning.first().unwrap().to_owned() }); + } + did_call_tool = false; + }, + Ok(_) => { + // do nothing here as we don't need to accumulate token usage + } + Err(e) => { + yield Err(e.into()); + break 'outer; + } + } + } + + // Add (parallel) tool calls to chat history + if !tool_calls.is_empty() { + chat_history.push(Message::Assistant { + id: None, + content: OneOrMany::many(tool_calls).expect("Impossible EmptyListError"), + }); + } + + // Add tool results to chat history + for (id, call_id, tool_result) in tool_results { + if let Some(call_id) = call_id { + chat_history.push(Message::User { + content: OneOrMany::one(UserContent::tool_result_with_call_id( + id, + call_id, + OneOrMany::one(ToolResultContent::text(tool_result)), + )), + }); + } else { + chat_history.push(Message::User { + content: OneOrMany::one(UserContent::tool_result( + id, + OneOrMany::one(ToolResultContent::text(tool_result)), + )), + }); + + } + + } + + // Set the current prompt to the last message in the chat history + current_prompt = match chat_history.pop() { + Some(prompt) => prompt, + None => unreachable!("Chat history should never be empty at this point"), + }; + + if !did_call_tool { + break; + } + } + + })) as _ +} \ No newline at end of file diff --git a/src/agents/optimizer.rs b/src/agents/optimizer.rs index 37323b3..9de7707 100644 --- a/src/agents/optimizer.rs +++ b/src/agents/optimizer.rs @@ -1,10 +1,12 @@ -use crate::error::{Result, ScribeError, map_provider_error}; +use crate::error::{Result, ScribeError}; use crate::types::{Artifact, Intent, MODEL}; use crate::tools::{deconstructor::Deconstructor, prompt_reviewer::PromptReviewer, web_searcher::WebSearcher}; use crate::utilities::require_env; use rig::providers::gemini::Client; -use rig::prelude::*; -use rig::completion::Prompt; +use futures::StreamExt; +use rig::client::CompletionClient; +use crate::agents::multi_turn_prompt; +use rig::tool::Tool; pub async fn optimizer(prompt: Intent) -> Result { require_env("GEMINI_API_KEY")?; @@ -14,6 +16,16 @@ pub async fn optimizer(prompt: Intent) -> Result { .map_err(|e| ScribeError::Validation(format!("Failed to parse embedded optimizer.json: {}", e)))?; let system_prompt = artifact.system_prompt; + // Log tool definitions for verbose output + let deconstructor_def = Deconstructor.definition("".to_string()).await; + tracing::info!("Tool Definition - Deconstructor: {:?}", deconstructor_def); + + let prompt_reviewer_def = PromptReviewer.definition("".to_string()).await; + tracing::info!("Tool Definition - PromptReviewer: {:?}", prompt_reviewer_def); + + let web_searcher_def = WebSearcher.definition("".to_string()).await; + tracing::info!("Tool Definition - WebSearcher: {:?}", web_searcher_def); + let prompt_officer = client .agent(MODEL) .preamble(system_prompt.as_str()) @@ -25,17 +37,32 @@ pub async fn optimizer(prompt: Intent) -> Result { let input = format!( "Follow this workflow to optimize the prompt: 1. Use the Deconstructor tool to analyze the goal and constraints of: '{}' - 2. Use the PromptReviewer to check and refine the draft. - 3. Use the WebSearcher to find the best practice related task/goal. - 4. Finally, provide the optimized system prompt. + 2. Use the PromptReviewer to check, you must research (using WebSearcher), and refine the draft. + 3. Finally, provide the optimized system prompt. Constraint: The final output must be the system prompt only, but you MUST use your tools first to arrive at that result.", prompt.text ); - let optimized_prompt = prompt_officer - .prompt(input) - .await - .map_err(map_provider_error)?; + let mut stream =multi_turn_prompt( prompt_officer,input,Vec::new()).await; + + tracing::info!("Starting optimization streaming..."); + let mut optimized_prompt = String::new(); + while let Some(res) = stream.next().await { + match res { + Ok(text) => { + print!("{}", text.text); + use std::io::Write; + let _ = std::io::stdout().flush(); + optimized_prompt.push_str(&text.text); + } + Err(e) => { + tracing::error!("Streaming error: {}", e); + return Err(ScribeError::ProtocolViolation(e.to_string())); + } + } + } + println!(); + tracing::info!("Optimization complete. Final artifact length: {}", optimized_prompt.len()); let artifact = Artifact { system_prompt: optimized_prompt, signed_by: "".to_string(), diff --git a/src/error.rs b/src/error.rs index 4e81e7b..add9b87 100644 --- a/src/error.rs +++ b/src/error.rs @@ -29,11 +29,4 @@ pub enum ScribeError { ClientError(#[from] rig::http_client::Error), } -pub fn map_provider_error(e: rig::completion::PromptError) -> ScribeError { - let msg = e.to_string(); - if msg.contains("though_signature") || msg.contains("INVALID_ARGUMENT") { - ScribeError::ProtocolViolation(msg) - } else { - ScribeError::Provider(e) - } -} + diff --git a/src/lib.rs b/src/lib.rs index 5c113b3..d9454dd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ mod error; pub mod agents; pub mod tools; mod types; +pub mod logging; pub mod utilities; use std::path::PathBuf; @@ -16,6 +17,8 @@ use crate::utilities::{read_artifact, save_artifacts}; pub struct RigScribe { cache_dir: PathBuf, } +use tracing::info; + impl RigScribe { pub fn new(cache_dir: impl Into) -> Self { Self { @@ -38,14 +41,14 @@ impl RigScribe { let path = self.cache_dir.join(file_name); if let Ok(cached_artifact) = read_artifact(&path).await { - eprintln!("Cache HIT: {:?} loaded from disk", path); + info!("Cache HIT: {:?} loaded from disk", path); return Ok(cached_artifact); } - eprintln!("Cache MIS: {:?}", path); - eprintln!("Optimizing ..."); + info!("Cache MIS: {:?}", path); + info!("Optimizing ..."); let fresh_artifact = Self::optimize_agentic(request.into()).await?; save_artifacts(&path, &fresh_artifact).await?; - eprintln!("Optimize prompte cached to: {:?}", path); + info!("Optimize prompt cached to: {:?}", path); Ok(fresh_artifact) } } diff --git a/src/logging.rs b/src/logging.rs new file mode 100644 index 0000000..0d8f182 --- /dev/null +++ b/src/logging.rs @@ -0,0 +1,35 @@ +use tracing_appender::non_blocking::WorkerGuard; +use tracing_subscriber::{fmt, prelude::*, EnvFilter}; + +pub fn init_logging() -> WorkerGuard { + let file_appender = tracing_appender::rolling::daily("logs", "rigscribe.log"); + let (non_blocking, guard) = tracing_appender::non_blocking(file_appender); + + let console_layer = fmt::layer() + .with_target(true) // Include context (target) + .with_thread_ids(false) + .with_level(true) + .with_file(true) + .with_line_number(true) + .compact(); // Use a more compact format for console if desired, or pretty() + + let file_layer = fmt::layer() + .with_writer(non_blocking) + .with_target(true) + .with_thread_ids(true) + .with_level(true) + .with_file(true) + .with_line_number(true) + .with_ansi(false); // Disable colors for file + + let filter = EnvFilter::try_from_default_env() + .unwrap_or_else(|_| EnvFilter::new("info,rigscribe=debug")); + + tracing_subscriber::registry() + .with(filter) + .with(console_layer) + .with(file_layer) + .init(); + + guard +} diff --git a/src/main.rs b/src/main.rs index 7109005..feaf928 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,17 +1,21 @@ use std::path::PathBuf; -use rigscribe::{Result, RigScribe, ScopeId}; +use rigscribe::{Result, RigScribe, ScopeId, logging}; use termimad::MadSkin; +use tracing::info; + /// Entry point: Orchestrates the transformation of user intent into a system prompt. #[tokio::main] async fn main() -> Result<()> { + let _guard = logging::init_logging(); + // create caching path let cache_path = PathBuf::from("./.prompts_perssitense_cache"); let scribe = RigScribe::new(cache_path); - let id = ScopeId(2032); + let id = ScopeId(2011); // Input: The raw, often vague user intent. let raw_prompt = "write a python fonction"; - eprintln!("\n\nOptimizing ...\n\n"); + info!("Starting prompt optimization process for: '{}'", raw_prompt); // Execute the multi-agent optimization pipeline. let optimized_prompt = scribe.optimize_with_cache(raw_prompt, id).await?; diff --git a/src/tools/deconstructor.rs b/src/tools/deconstructor.rs index d65e5b2..88fcc56 100644 --- a/src/tools/deconstructor.rs +++ b/src/tools/deconstructor.rs @@ -5,7 +5,6 @@ use rig::completion::ToolDefinition; use rig::tool::Tool; use rig::providers::gemini::Client; use serde::{Deserialize, Serialize}; -use rig::completion::Prompt; use rig::client::ProviderClient; use rig::prelude::*; // Needed for .prompt() method @@ -30,7 +29,7 @@ impl Tool for Deconstructor { } async fn call(&self, args: Self::Args) -> Result { - println!("[Tool Calling]-> Deconstructor!"); + tracing::info!("[Tool Calling]-> Deconstructor with args: {:?}", args); require_env("GEMINI_API_KEY")?; let client = Client::from_env(); let architect = client @@ -43,10 +42,26 @@ impl Tool for Deconstructor { ", ) .build(); - let repons = architect.prompt(args.text.clone()).await?; + + let mut stream = crate::agents::multi_turn_prompt(architect, args.text.clone(), Vec::new()).await; + let mut full_response = String::new(); + while let Some(res) = futures::StreamExt::next(&mut stream).await { + match res { + Ok(text) => { + print!("{}", text.text); + use std::io::Write; + let _ = std::io::stdout().flush(); + full_response.push_str(&text.text); + } + Err(e) => return Err(ScribeError::ProtocolViolation(e.to_string())), + } + } + println!(); + let spec_extractor = client.extractor::(MODEL).build(); - let spec = spec_extractor.extract(repons).await; + let spec = spec_extractor.extract(full_response).await?; - Ok(spec?) + tracing::debug!("Deconstructor extracted spec: {:?}", spec); + Ok(spec) } } diff --git a/src/tools/prompt_reviewer.rs b/src/tools/prompt_reviewer.rs index dc30840..e1ba007 100644 --- a/src/tools/prompt_reviewer.rs +++ b/src/tools/prompt_reviewer.rs @@ -4,9 +4,9 @@ use crate::utilities::require_env; use rig::completion::ToolDefinition; use rig::tool::Tool; use rig::providers::gemini::Client; +use crate::tools::web_searcher::WebSearcher; use serde::{Deserialize, Serialize}; use schemars::JsonSchema; -use rig::completion::Prompt; use rig::client::ProviderClient; use rig::prelude::*; @@ -37,24 +37,46 @@ impl Tool for PromptReviewer { } async fn call(&self, args: Self::Args) -> Result { - println!("[Tool Calling]-> PromptReviewer!"); + tracing::info!("[Tool Calling]-> PromptReviewer with args: {:?}", args); require_env("GEMINI_API_KEY")?; let client = Client::from_env(); let system_prompt_json = include_str!("../../data/prompt_officer.json"); let artifact: Artifact = serde_json::from_str(system_prompt_json) .map_err(|e| ScribeError::Validation(format!("Failed to parse embedded prompt_officer.json: {}", e)))?; let system_prompt = artifact.system_prompt; - let prompt_reviewer = client.agent(MODEL).preamble(system_prompt.as_str()).build(); + let prompt_reviewer = client.agent(MODEL) + .preamble(system_prompt.as_str()) + .tool(WebSearcher) + .build(); let input = format!(" Critisize following prompt base on given property: Goal:\n{}\n\nConstraints:\n{}\n\nDraft:\n{}\n\n\ - Instruction: Be highly cretical and persimiste and find every defit or any point which could be better. and use all best practice and if needed use websearch. \n", + Instruction: Be highly critical and pessimistic. + 1. You MUST first use the 'WebSearcher' tool to research state-of-the-art prompt engineering techniques and best practices specifically for this type of task. + 2. Use the search results to find every deficit in the draft. + 3. Rewrite the prompt to be flawless. + \n", args.spec.goal, args.spec.constraints, args.intent.text); - let repons = prompt_reviewer.prompt(input).await?; + let mut stream = crate::agents::multi_turn_prompt(prompt_reviewer, input, Vec::new()).await; + let mut full_response = String::new(); + while let Some(res) = futures::StreamExt::next(&mut stream).await { + match res { + Ok(text) => { + print!("{}", text.text); + use std::io::Write; + let _ = std::io::stdout().flush(); + full_response.push_str(&text.text); + } + Err(e) => return Err(ScribeError::ProtocolViolation(e.to_string())), + } + } + println!(); + let artifact_extractor = client.extractor::(MODEL).build(); - let artifact = artifact_extractor.extract(repons).await; + let artifact = artifact_extractor.extract(full_response).await?; - Ok(artifact?) + tracing::debug!("PromptReviewer produced artifact: {:?}", artifact); + Ok(artifact) } } diff --git a/src/tools/web_searcher.rs b/src/tools/web_searcher.rs index a63230f..d26b86b 100644 --- a/src/tools/web_searcher.rs +++ b/src/tools/web_searcher.rs @@ -19,12 +19,12 @@ impl Tool for WebSearcher { let parameters = serde_json::to_value(schema).unwrap(); ToolDefinition { name: "WebSearcher".to_string(), - description: "this tools query will search on web and retern result in one string".to_string(), + description: "A research tool. Use this to find best practices, domain-specific knowledge, or to verify assumptions about the user's goal.".to_string(), parameters, } } async fn call(&self, args: Self::Args) -> Result { - println!("[Tool Calling]-> WebSearcher"); + tracing::info!("[Tool Calling]-> WebSearcher with args: {:?}", args); let api_key = std::env::var("SERPER_API_KEY").map_err( |e| ScribeError::Config(format!("SERPER_API_KEY not set: {}", e))