"""The one-and-only AI primitive used by every demo in this folder. Every script in scripts/ imports `ask` from this module. The whole point of the example is that *every* "smart function" you'll see is the same call — just a different prompt going in, and a string coming back out. The model doesn't know whether it's doing arithmetic, sentiment analysis, or reading text out of a photo. It is, in every case, predicting what comes next. The model runs locally inside the `ollama` container started by the docker-compose file alongside this script. We hit its HTTP API; nothing about your input ever leaves the machine. """ from __future__ import annotations import base64 import os from pathlib import Path import httpx OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434") OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "qwen3.5:9b") # Reasonable timeout for a CPU-only first run; vision calls can be slow. _TIMEOUT = httpx.Timeout(connect=10.0, read=300.0, write=60.0, pool=10.0) def ask(prompt: str, image: str | Path | None = None, *, temperature: float = 0.0) -> str: """Ask the local model to continue some text. Args: prompt: The text the model sees. Whatever you write here is the "body" of your AI function — the same way regular Python functions have a body of code, an AI function has a body of prompt. image: Optional path to a local image. When provided, the model sees the image alongside the prompt (this needs a vision-capable model — `qwen2.5vl` is the default). temperature: 0 makes outputs roughly deterministic, which is what we want for demos. Crank it up for more variety, down for fewer surprises. Returns: The model's continuation as a plain string, with surrounding whitespace stripped. """ payload: dict = { "model": OLLAMA_MODEL, "prompt": prompt, "stream": False, "options": {"temperature": temperature}, # Some Qwen builds support a "thinking" mode where the model writes # out an internal monologue before answering. For this workshop we # want clean, direct completions, so we ask for that explicitly. "think": False, } if image is not None: image_path = Path(image) if not image_path.exists(): raise FileNotFoundError(f"Image not found: {image_path}") payload["images"] = [base64.b64encode(image_path.read_bytes()).decode("ascii")] with httpx.Client(timeout=_TIMEOUT) as client: try: r = client.post(f"{OLLAMA_URL}/api/generate", json=payload) except httpx.ConnectError as e: raise RuntimeError( f"Could not reach Ollama at {OLLAMA_URL}. " "Is the docker-compose stack running? Try `docker compose up -d`." ) from e r.raise_for_status() return r.json()["response"].strip() if __name__ == "__main__": # Sanity check — confirms the model is reachable and answering. print(f"Asking {OLLAMA_MODEL} at {OLLAMA_URL} to say hello...") print(ask("Say hello in five words or fewer."))