79 lines
3.1 KiB
Python
79 lines
3.1 KiB
Python
|
|
"""The one-and-only AI primitive used by every demo in this folder.
|
||
|
|
|
||
|
|
Every script in scripts/ imports `ask` from this module. The whole point of
|
||
|
|
the example is that *every* "smart function" you'll see is the same call —
|
||
|
|
just a different prompt going in, and a string coming back out. The model
|
||
|
|
doesn't know whether it's doing arithmetic, sentiment analysis, or reading
|
||
|
|
text out of a photo. It is, in every case, predicting what comes next.
|
||
|
|
|
||
|
|
The model runs locally inside the `ollama` container started by the
|
||
|
|
docker-compose file alongside this script. We hit its HTTP API; nothing
|
||
|
|
about your input ever leaves the machine.
|
||
|
|
"""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import base64
|
||
|
|
import os
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
import httpx
|
||
|
|
|
||
|
|
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||
|
|
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "qwen3.5:9b")
|
||
|
|
|
||
|
|
# Reasonable timeout for a CPU-only first run; vision calls can be slow.
|
||
|
|
_TIMEOUT = httpx.Timeout(connect=10.0, read=300.0, write=60.0, pool=10.0)
|
||
|
|
|
||
|
|
|
||
|
|
def ask(prompt: str, image: str | Path | None = None, *, temperature: float = 0.0) -> str:
|
||
|
|
"""Ask the local model to continue some text.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
prompt: The text the model sees. Whatever you write here is the
|
||
|
|
"body" of your AI function — the same way regular Python functions
|
||
|
|
have a body of code, an AI function has a body of prompt.
|
||
|
|
image: Optional path to a local image. When provided, the model
|
||
|
|
sees the image alongside the prompt (this needs a vision-capable
|
||
|
|
model — `qwen2.5vl` is the default).
|
||
|
|
temperature: 0 makes outputs roughly deterministic, which is what we
|
||
|
|
want for demos. Crank it up for more variety, down for fewer surprises.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
The model's continuation as a plain string, with surrounding whitespace
|
||
|
|
stripped.
|
||
|
|
"""
|
||
|
|
payload: dict = {
|
||
|
|
"model": OLLAMA_MODEL,
|
||
|
|
"prompt": prompt,
|
||
|
|
"stream": False,
|
||
|
|
"options": {"temperature": temperature},
|
||
|
|
# Some Qwen builds support a "thinking" mode where the model writes
|
||
|
|
# out an internal monologue before answering. For this workshop we
|
||
|
|
# want clean, direct completions, so we ask for that explicitly.
|
||
|
|
"think": False,
|
||
|
|
}
|
||
|
|
|
||
|
|
if image is not None:
|
||
|
|
image_path = Path(image)
|
||
|
|
if not image_path.exists():
|
||
|
|
raise FileNotFoundError(f"Image not found: {image_path}")
|
||
|
|
payload["images"] = [base64.b64encode(image_path.read_bytes()).decode("ascii")]
|
||
|
|
|
||
|
|
with httpx.Client(timeout=_TIMEOUT) as client:
|
||
|
|
try:
|
||
|
|
r = client.post(f"{OLLAMA_URL}/api/generate", json=payload)
|
||
|
|
except httpx.ConnectError as e:
|
||
|
|
raise RuntimeError(
|
||
|
|
f"Could not reach Ollama at {OLLAMA_URL}. "
|
||
|
|
"Is the docker-compose stack running? Try `docker compose up -d`."
|
||
|
|
) from e
|
||
|
|
r.raise_for_status()
|
||
|
|
return r.json()["response"].strip()
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
# Sanity check — confirms the model is reachable and answering.
|
||
|
|
print(f"Asking {OLLAMA_MODEL} at {OLLAMA_URL} to say hello...")
|
||
|
|
print(ask("Say hello in five words or fewer."))
|