example-projects/examples/everything_function/scripts/ai_function.py

"""The one-and-only AI primitive used by every demo in this folder.

Every script in scripts/ imports `ask` from this module. The whole point of
the example is that *every* "smart function" you'll see is the same call —
just a different prompt going in, and a string coming back out. The model
doesn't know whether it's doing arithmetic, sentiment analysis, or reading
text out of a photo. It is, in every case, predicting what comes next.

The model runs locally inside the `ollama` container started by the
docker-compose file alongside this script. We hit its HTTP API; nothing
about your input ever leaves the machine.
"""

from __future__ import annotations

import base64
import os
from pathlib import Path

import httpx

OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "qwen3.5:9b")

# Reasonable timeout for a CPU-only first run; vision calls can be slow.
_TIMEOUT = httpx.Timeout(connect=10.0, read=300.0, write=60.0, pool=10.0)


def ask(prompt: str, image: str | Path | None = None, *, temperature: float = 0.0) -> str:
    """Ask the local model to continue some text.

    Args:
        prompt: The text the model sees. Whatever you write here is the
            "body" of your AI function — the same way regular Python functions
            have a body of code, an AI function has a body of prompt.
        image: Optional path to a local image. When provided, the model
            sees the image alongside the prompt (this needs a vision-capable
            model — `qwen2.5vl` is the default).
        temperature: 0 makes outputs roughly deterministic, which is what we
            want for demos. Crank it up for more variety, down for fewer surprises.

    Returns:
        The model's continuation as a plain string, with surrounding whitespace
        stripped.
    """
    payload: dict = {
        "model": OLLAMA_MODEL,
        "prompt": prompt,
        "stream": False,
        "options": {"temperature": temperature},
        # Some Qwen builds support a "thinking" mode where the model writes
        # out an internal monologue before answering. For this workshop we
        # want clean, direct completions, so we ask for that explicitly.
        "think": False,
    }

    if image is not None:
        image_path = Path(image)
        if not image_path.exists():
            raise FileNotFoundError(f"Image not found: {image_path}")
        payload["images"] = [base64.b64encode(image_path.read_bytes()).decode("ascii")]

    with httpx.Client(timeout=_TIMEOUT) as client:
        try:
            r = client.post(f"{OLLAMA_URL}/api/generate", json=payload)
        except httpx.ConnectError as e:
            raise RuntimeError(
                f"Could not reach Ollama at {OLLAMA_URL}. "
                "Is the docker-compose stack running? Try `docker compose up -d`."
            ) from e
        r.raise_for_status()
        return r.json()["response"].strip()


if __name__ == "__main__":
    # Sanity check — confirms the model is reachable and answering.
    print(f"Asking {OLLAMA_MODEL} at {OLLAMA_URL} to say hello...")
    print(ask("Say hello in five words or fewer."))
Syncing from Shen's latest main on github 2026-05-28 17:16:02 +00:00			`"""The one-and-only AI primitive used by every demo in this folder.`

			Every script in scripts/ imports `ask` from this module. The whole point of
			`the example is that every "smart function" you'll see is the same call —`
			`just a different prompt going in, and a string coming back out. The model`
			`doesn't know whether it's doing arithmetic, sentiment analysis, or reading`
			`text out of a photo. It is, in every case, predicting what comes next.`

			The model runs locally inside the `ollama` container started by the
			`docker-compose file alongside this script. We hit its HTTP API; nothing`
			`about your input ever leaves the machine.`
			`"""`

			`from __future__ import annotations`

			`import base64`
			`import os`
			`from pathlib import Path`

			`import httpx`

			`OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")`
			`OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "qwen3.5:9b")`

			`# Reasonable timeout for a CPU-only first run; vision calls can be slow.`
			`_TIMEOUT = httpx.Timeout(connect=10.0, read=300.0, write=60.0, pool=10.0)`


			`def ask(prompt: str, image: str \| Path \| None = None, *, temperature: float = 0.0) -> str:`
			`"""Ask the local model to continue some text.`

			`Args:`
			`prompt: The text the model sees. Whatever you write here is the`
			`"body" of your AI function — the same way regular Python functions`
			`have a body of code, an AI function has a body of prompt.`
			`image: Optional path to a local image. When provided, the model`
			`sees the image alongside the prompt (this needs a vision-capable`
			model — `qwen2.5vl` is the default).
			`temperature: 0 makes outputs roughly deterministic, which is what we`
			`want for demos. Crank it up for more variety, down for fewer surprises.`

			`Returns:`
			`The model's continuation as a plain string, with surrounding whitespace`
			`stripped.`
			`"""`
			`payload: dict = {`
			`"model": OLLAMA_MODEL,`
			`"prompt": prompt,`
			`"stream": False,`
			`"options": {"temperature": temperature},`
			`# Some Qwen builds support a "thinking" mode where the model writes`
			`# out an internal monologue before answering. For this workshop we`
			`# want clean, direct completions, so we ask for that explicitly.`
			`"think": False,`
			`}`

			`if image is not None:`
			`image_path = Path(image)`
			`if not image_path.exists():`
			`raise FileNotFoundError(f"Image not found: {image_path}")`
			`payload["images"] = [base64.b64encode(image_path.read_bytes()).decode("ascii")]`

			`with httpx.Client(timeout=_TIMEOUT) as client:`
			`try:`
			`r = client.post(f"{OLLAMA_URL}/api/generate", json=payload)`
			`except httpx.ConnectError as e:`
			`raise RuntimeError(`
			`f"Could not reach Ollama at {OLLAMA_URL}. "`
			"Is the docker-compose stack running? Try `docker compose up -d`."
			`) from e`
			`r.raise_for_status()`
			`return r.json()["response"].strip()`


			`if __name__ == "__main__":`
			`# Sanity check — confirms the model is reachable and answering.`
			`print(f"Asking {OLLAMA_MODEL} at {OLLAMA_URL} to say hello...")`
			`print(ask("Say hello in five words or fewer."))`