example-projects/examples/everything_function/scripts/ai_function.py

79 lines
3.1 KiB
Python

"""The one-and-only AI primitive used by every demo in this folder.
Every script in scripts/ imports `ask` from this module. The whole point of
the example is that *every* "smart function" you'll see is the same call —
just a different prompt going in, and a string coming back out. The model
doesn't know whether it's doing arithmetic, sentiment analysis, or reading
text out of a photo. It is, in every case, predicting what comes next.
The model runs locally inside the `ollama` container started by the
docker-compose file alongside this script. We hit its HTTP API; nothing
about your input ever leaves the machine.
"""
from __future__ import annotations
import base64
import os
from pathlib import Path
import httpx
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "qwen3.5:9b")
# Reasonable timeout for a CPU-only first run; vision calls can be slow.
_TIMEOUT = httpx.Timeout(connect=10.0, read=300.0, write=60.0, pool=10.0)
def ask(prompt: str, image: str | Path | None = None, *, temperature: float = 0.0) -> str:
"""Ask the local model to continue some text.
Args:
prompt: The text the model sees. Whatever you write here is the
"body" of your AI function — the same way regular Python functions
have a body of code, an AI function has a body of prompt.
image: Optional path to a local image. When provided, the model
sees the image alongside the prompt (this needs a vision-capable
model — `qwen2.5vl` is the default).
temperature: 0 makes outputs roughly deterministic, which is what we
want for demos. Crank it up for more variety, down for fewer surprises.
Returns:
The model's continuation as a plain string, with surrounding whitespace
stripped.
"""
payload: dict = {
"model": OLLAMA_MODEL,
"prompt": prompt,
"stream": False,
"options": {"temperature": temperature},
# Some Qwen builds support a "thinking" mode where the model writes
# out an internal monologue before answering. For this workshop we
# want clean, direct completions, so we ask for that explicitly.
"think": False,
}
if image is not None:
image_path = Path(image)
if not image_path.exists():
raise FileNotFoundError(f"Image not found: {image_path}")
payload["images"] = [base64.b64encode(image_path.read_bytes()).decode("ascii")]
with httpx.Client(timeout=_TIMEOUT) as client:
try:
r = client.post(f"{OLLAMA_URL}/api/generate", json=payload)
except httpx.ConnectError as e:
raise RuntimeError(
f"Could not reach Ollama at {OLLAMA_URL}. "
"Is the docker-compose stack running? Try `docker compose up -d`."
) from e
r.raise_for_status()
return r.json()["response"].strip()
if __name__ == "__main__":
# Sanity check — confirms the model is reachable and answering.
print(f"Asking {OLLAMA_MODEL} at {OLLAMA_URL} to say hello...")
print(ask("Say hello in five words or fewer."))