Compare commits
4 Commits
feature/lo
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 200500ecd1 | |||
| d8d2c43ade | |||
| 949dfc309b | |||
| 97555d44af |
355
bender/bot.py
355
bender/bot.py
@@ -8,6 +8,7 @@ import threading
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
from duckduckgo_search import DDGS
|
||||
from md2mrkdwn import convert as md_to_slack
|
||||
from slack_bolt import App
|
||||
from slack_bolt.adapter.socket_mode import SocketModeHandler
|
||||
@@ -37,6 +38,332 @@ logger.info("Loaded system prompt from %s (%d chars)", SYSTEM_PROMPT_PATH, len(S
|
||||
# ---------------------------------------------------------------------------
|
||||
DEEPSEEK_TIMEOUT = 120 # seconds
|
||||
MAX_INLINE_LENGTH = 2800 # characters
|
||||
MAX_TOOL_TURNS = 5 # max tool-call back-and-forths with DeepSeek
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool definitions (sent to DeepSeek API for function calling)
|
||||
# ---------------------------------------------------------------------------
|
||||
DEEPSEEK_TOOLS = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "web_search",
|
||||
"description": (
|
||||
"Search the web for current, up-to-date information. "
|
||||
"Returns a list of results with titles, URLs, and snippets. "
|
||||
"Use this as a first step to find relevant pages."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "The search query string. Be specific and include relevant keywords.",
|
||||
}
|
||||
},
|
||||
"required": ["query"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "web_fetch",
|
||||
"description": (
|
||||
"Fetch and read the full content of a web page. "
|
||||
"Use this after web_search when you need details beyond the snippet — "
|
||||
"for example, to read a full article, get specific numbers, "
|
||||
"or understand context that the search snippet didn't cover."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "The full URL of the page to fetch.",
|
||||
}
|
||||
},
|
||||
"required": ["url"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool implementations
|
||||
# ---------------------------------------------------------------------------
|
||||
def _format_search_results(results: list[dict]) -> str:
|
||||
if not results:
|
||||
return "No search results found."
|
||||
|
||||
lines = []
|
||||
for i, r in enumerate(results[:5], 1):
|
||||
title = r.get("title", "No title")
|
||||
href = r.get("href", "")
|
||||
body = r.get("body", "No description")
|
||||
lines.append(f"{i}. {title}")
|
||||
if href:
|
||||
lines.append(f" URL: {href}")
|
||||
lines.append(f" {body}")
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def execute_web_search(query: str) -> str:
|
||||
try:
|
||||
results = list(DDGS().text(query, max_results=5))
|
||||
return _format_search_results(results)
|
||||
except Exception as exc:
|
||||
logger.exception("Web search failed for query=%r", query)
|
||||
return f"Web search error: {exc}"
|
||||
|
||||
|
||||
def execute_web_fetch(url: str) -> str:
|
||||
FETCH_TIMEOUT = 15
|
||||
MAX_BODY_CHARS = 5000
|
||||
|
||||
try:
|
||||
response = requests.get(
|
||||
url,
|
||||
headers={"User-Agent": "Mozilla/5.0 (compatible; SlackBot/1.0)"},
|
||||
timeout=FETCH_TIMEOUT,
|
||||
allow_redirects=True,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
content_type = response.headers.get("content-type", "")
|
||||
if "text/html" not in content_type:
|
||||
body = response.text
|
||||
else:
|
||||
try:
|
||||
from lxml import html
|
||||
doc = html.fromstring(response.content)
|
||||
for tag in doc.xpath("//script | //style | //nav | //footer | //header"):
|
||||
tag.drop_tree()
|
||||
body = doc.text_content()
|
||||
except Exception:
|
||||
body = response.text
|
||||
|
||||
lines = [line.strip() for line in body.splitlines() if line.strip()]
|
||||
text = "\n".join(lines)
|
||||
|
||||
if len(text) > MAX_BODY_CHARS:
|
||||
text = text[:MAX_BODY_CHARS] + "\n\n[truncated]"
|
||||
|
||||
return f"Content from {url}:\n\n{text}"
|
||||
|
||||
except requests.Timeout:
|
||||
return f"Error fetching {url}: request timed out"
|
||||
except requests.HTTPError as exc:
|
||||
return f"Error fetching {url}: HTTP {exc.response.status_code}"
|
||||
except Exception as exc:
|
||||
logger.exception("Web fetch failed for url=%r", url)
|
||||
return f"Error fetching {url}: {exc}"
|
||||
|
||||
|
||||
TOOL_EXECUTORS = {
|
||||
"web_search": execute_web_search,
|
||||
"web_fetch": execute_web_fetch,
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DSML fallback parser — DeepSeek V4 sometimes leaks raw DSML markup in the
|
||||
# content field instead of returning structured tool_calls.
|
||||
# ---------------------------------------------------------------------------
|
||||
_DSML_INVOKE_RE = re.compile(
|
||||
r'<||DSML||invoke\s+name="([^"]+)">'
|
||||
r'(.*?)'
|
||||
r'<||DSML||invoke>',
|
||||
re.DOTALL,
|
||||
)
|
||||
_DSML_PARAM_RE = re.compile(
|
||||
r'<||DSML||parameter\s+name="([^"]+)"[^>]*>'
|
||||
r'(.*?)'
|
||||
r'||DSML||parameter>',
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
|
||||
def parse_dsml_tool_calls(content: str) -> list[dict] | None:
|
||||
if "DSML" not in content:
|
||||
return None
|
||||
|
||||
invocations = _DSML_INVOKE_RE.findall(content)
|
||||
if not invocations:
|
||||
return None
|
||||
|
||||
tool_calls = []
|
||||
for i, (fn_name, param_block) in enumerate(invocations):
|
||||
params = {}
|
||||
for pname, pvalue in _DSML_PARAM_RE.findall(param_block):
|
||||
params[pname] = pvalue.strip()
|
||||
tool_calls.append({
|
||||
"id": f"dsml_fallback_{i}",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": fn_name,
|
||||
"arguments": json.dumps(params),
|
||||
},
|
||||
})
|
||||
|
||||
return tool_calls if tool_calls else None
|
||||
|
||||
|
||||
_DSML_BLOCK_RE = re.compile(
|
||||
r'<||DSML||tool_calls>.*?<||DSML||tool_calls>',
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
|
||||
def _strip_dsml(text: str) -> str:
|
||||
if not text or "DSML" not in text:
|
||||
return text
|
||||
return _DSML_BLOCK_RE.sub("", text).strip()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DeepSeek API client
|
||||
# ---------------------------------------------------------------------------
|
||||
def call_deepseek(
|
||||
api_url: str,
|
||||
api_key: str,
|
||||
model: str,
|
||||
messages: list[dict],
|
||||
tools: list[dict] | None = None,
|
||||
) -> dict:
|
||||
payload: dict = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
}
|
||||
if tools:
|
||||
payload["tools"] = tools
|
||||
|
||||
response = requests.post(
|
||||
api_url,
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=payload,
|
||||
timeout=DEEPSEEK_TIMEOUT,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool calling loop
|
||||
# ---------------------------------------------------------------------------
|
||||
def chat_with_tools(
|
||||
api_url: str,
|
||||
api_key: str,
|
||||
model: str,
|
||||
prompt_text: str,
|
||||
ws_logger: logging.LoggerAdapter,
|
||||
) -> str:
|
||||
messages: list[dict] = [
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": prompt_text},
|
||||
]
|
||||
|
||||
for turn in range(1, MAX_TOOL_TURNS + 1):
|
||||
data = call_deepseek(api_url, api_key, model, messages, DEEPSEEK_TOOLS)
|
||||
|
||||
choice = data["choices"][0]
|
||||
finish_reason = choice["finish_reason"]
|
||||
assistant_msg = choice["message"]
|
||||
|
||||
ws_logger.info(
|
||||
"DeepSeek turn %d/%d finish_reason=%s tokens=%s",
|
||||
turn,
|
||||
MAX_TOOL_TURNS,
|
||||
finish_reason,
|
||||
data.get("usage", {}),
|
||||
)
|
||||
|
||||
tool_calls = assistant_msg.get("tool_calls")
|
||||
content = assistant_msg.get("content", "") or ""
|
||||
|
||||
if not tool_calls and finish_reason == "stop":
|
||||
parsed = parse_dsml_tool_calls(content)
|
||||
if parsed:
|
||||
ws_logger.info(
|
||||
"Detected %d DSML tool call(s) in content (fallback parser)",
|
||||
len(parsed),
|
||||
)
|
||||
tool_calls = parsed
|
||||
content = None
|
||||
finish_reason = "tool_calls"
|
||||
|
||||
if tool_calls and content and "DSML" in content:
|
||||
ws_logger.info("Stripping leaked DSML from assistant content")
|
||||
content = None
|
||||
|
||||
stored_msg: dict = {"role": "assistant"}
|
||||
if assistant_msg.get("reasoning_content") is not None:
|
||||
stored_msg["reasoning_content"] = assistant_msg["reasoning_content"]
|
||||
elif "reasoning_content" in assistant_msg:
|
||||
stored_msg["reasoning_content"] = ""
|
||||
if content:
|
||||
stored_msg["content"] = content
|
||||
if tool_calls:
|
||||
stored_msg["tool_calls"] = tool_calls
|
||||
|
||||
messages.append(stored_msg)
|
||||
|
||||
if finish_reason == "tool_calls" and tool_calls:
|
||||
ws_logger.info("DeepSeek requested %d tool call(s)", len(tool_calls))
|
||||
|
||||
for tc in tool_calls:
|
||||
fn_name = tc["function"]["name"]
|
||||
fn_args = json.loads(tc["function"]["arguments"])
|
||||
ws_logger.info(
|
||||
"Executing tool: %s(%s)", fn_name, json.dumps(fn_args)
|
||||
)
|
||||
|
||||
executor = TOOL_EXECUTORS.get(fn_name)
|
||||
if executor:
|
||||
try:
|
||||
result = executor(**fn_args)
|
||||
except Exception as tool_exc:
|
||||
result = f"Tool execution error: {tool_exc}"
|
||||
ws_logger.exception("Tool %s failed", fn_name)
|
||||
else:
|
||||
result = f"Error: Unknown tool '{fn_name}'"
|
||||
ws_logger.warning("Unknown tool requested: %s", fn_name)
|
||||
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tc["id"],
|
||||
"content": result,
|
||||
})
|
||||
|
||||
continue
|
||||
|
||||
elif finish_reason == "stop":
|
||||
return _strip_dsml(content)
|
||||
|
||||
else:
|
||||
ws_logger.warning("Unexpected finish_reason: %s", finish_reason)
|
||||
return _strip_dsml(content)
|
||||
|
||||
ws_logger.info("Tool turns exhausted; making final call without tools")
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Please provide your answer now based on the tool results above. "
|
||||
"Do not attempt any more tool calls. Summarize what you found."
|
||||
),
|
||||
})
|
||||
data = call_deepseek(api_url, api_key, model, messages, tools=None)
|
||||
final = data["choices"][0]["message"].get("content", "") or ""
|
||||
final = _strip_dsml(final)
|
||||
return final or (
|
||||
"I wasn't able to complete the request within the allowed number of steps. "
|
||||
"Please try again or simplify your question."
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -47,7 +374,6 @@ def load_workspaces():
|
||||
if raw:
|
||||
workspaces = json.loads(raw)
|
||||
else:
|
||||
# Fallback: build a single workspace from the legacy env vars
|
||||
slack_bot_token = os.environ.get("SLACK_BOT_TOKEN_BENDER")
|
||||
slack_app_token = os.environ.get("SLACK_APP_TOKEN_BENDER")
|
||||
deepseek_api_key = os.environ.get("DEEPSEEK_API_KEY_BENDER")
|
||||
@@ -92,7 +418,7 @@ def make_app(ws):
|
||||
ws_name = ws["name"]
|
||||
deepseek_api_key = ws["deepseek_api_key"]
|
||||
deepseek_api_url = ws.get("deepseek_api_url", "https://api.deepseek.com/chat/completions")
|
||||
deepseek_model = ws.get("deepseek_model", "deepseek-chat")
|
||||
deepseek_model = ws.get("deepseek_model", "deepseek-v4-pro")
|
||||
|
||||
ws_logger = logging.LoggerAdapter(logger, {"workspace": ws_name})
|
||||
|
||||
@@ -128,32 +454,21 @@ def make_app(ws):
|
||||
ws_logger.info("DeepSeek API call starting model=%s", deepseek_model)
|
||||
start = time.time()
|
||||
|
||||
response = requests.post(
|
||||
reply_text = chat_with_tools(
|
||||
deepseek_api_url,
|
||||
headers={
|
||||
"Authorization": f"Bearer {deepseek_api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"model": deepseek_model,
|
||||
"messages": [
|
||||
{"role": "system", "content": SYSTEM_PROMPT},
|
||||
{"role": "user", "content": prompt_text},
|
||||
],
|
||||
},
|
||||
timeout=DEEPSEEK_TIMEOUT,
|
||||
deepseek_api_key,
|
||||
deepseek_model,
|
||||
prompt_text,
|
||||
ws_logger,
|
||||
)
|
||||
|
||||
duration = time.time() - start
|
||||
ws_logger.info(
|
||||
"DeepSeek API call completed status=%s duration=%.2fs",
|
||||
response.status_code,
|
||||
"DeepSeek API call completed duration=%.2fs chars=%d",
|
||||
duration,
|
||||
len(reply_text),
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
reply_text = data["choices"][0]["message"]["content"]
|
||||
reply_text = md_to_slack(reply_text)
|
||||
|
||||
if len(reply_text) <= MAX_INLINE_LENGTH:
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
slack-bolt>=1.28.0
|
||||
requests>=2.31.0
|
||||
md2mrkdwn>=0.4.3
|
||||
duckduckgo-search>=8.0.0
|
||||
lxml>=5.0.0
|
||||
|
||||
117
deepseek/bot.py
117
deepseek/bot.py
@@ -166,6 +166,67 @@ TOOL_EXECUTORS = {
|
||||
"web_fetch": execute_web_fetch,
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DSML fallback parser — DeepSeek V4 sometimes leaks raw DSML markup in the
|
||||
# content field instead of returning structured tool_calls.
|
||||
# ---------------------------------------------------------------------------
|
||||
_DSML_INVOKE_RE = re.compile(
|
||||
r'<||DSML||invoke\s+name="([^"]+)">'
|
||||
r'(.*?)'
|
||||
r'<||DSML||invoke>',
|
||||
re.DOTALL,
|
||||
)
|
||||
_DSML_PARAM_RE = re.compile(
|
||||
r'<||DSML||parameter\s+name="([^"]+)"[^>]*>'
|
||||
r'(.*?)'
|
||||
r'||DSML||parameter>',
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
|
||||
def parse_dsml_tool_calls(content: str) -> list[dict] | None:
|
||||
"""Parse raw DSML markup into structured tool_calls.
|
||||
|
||||
Returns a list of tool-call dicts compatible with the OpenAI/DeepSeek
|
||||
tool_calls format, or None if no DSML was detected.
|
||||
"""
|
||||
if "DSML" not in content:
|
||||
return None
|
||||
|
||||
invocations = _DSML_INVOKE_RE.findall(content)
|
||||
if not invocations:
|
||||
return None
|
||||
|
||||
tool_calls = []
|
||||
for i, (fn_name, param_block) in enumerate(invocations):
|
||||
params = {}
|
||||
for pname, pvalue in _DSML_PARAM_RE.findall(param_block):
|
||||
params[pname] = pvalue.strip()
|
||||
tool_calls.append({
|
||||
"id": f"dsml_fallback_{i}",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": fn_name,
|
||||
"arguments": json.dumps(params),
|
||||
},
|
||||
})
|
||||
|
||||
return tool_calls if tool_calls else None
|
||||
|
||||
|
||||
_DSML_BLOCK_RE = re.compile(
|
||||
r'<||DSML||tool_calls>.*?<||DSML||tool_calls>',
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
|
||||
def _strip_dsml(text: str) -> str:
|
||||
"""Remove any leaked DSML tool-call blocks from text."""
|
||||
if not text or "DSML" not in text:
|
||||
return text
|
||||
return _DSML_BLOCK_RE.sub("", text).strip()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DeepSeek API client (async)
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -229,7 +290,6 @@ async def chat_with_tools(
|
||||
choice = data["choices"][0]
|
||||
finish_reason = choice["finish_reason"]
|
||||
assistant_msg = choice["message"]
|
||||
messages.append(assistant_msg)
|
||||
|
||||
ws_logger.info(
|
||||
"DeepSeek turn %d/%d finish_reason=%s tokens=%s",
|
||||
@@ -239,8 +299,41 @@ async def chat_with_tools(
|
||||
data.get("usage", {}),
|
||||
)
|
||||
|
||||
if finish_reason == "tool_calls":
|
||||
tool_calls = assistant_msg.get("tool_calls", [])
|
||||
# Determine tool_calls: either from the structured response, or by
|
||||
# parsing leaked DSML markup in the content field.
|
||||
tool_calls = assistant_msg.get("tool_calls")
|
||||
content = assistant_msg.get("content", "") or ""
|
||||
|
||||
if not tool_calls and finish_reason == "stop":
|
||||
parsed = parse_dsml_tool_calls(content)
|
||||
if parsed:
|
||||
ws_logger.info(
|
||||
"Detected %d DSML tool call(s) in content (fallback parser)",
|
||||
len(parsed),
|
||||
)
|
||||
tool_calls = parsed
|
||||
content = None
|
||||
finish_reason = "tool_calls"
|
||||
|
||||
# When tool_calls are present, strip any DSML that leaked into content
|
||||
if tool_calls and content and "DSML" in content:
|
||||
ws_logger.info("Stripping leaked DSML from assistant content")
|
||||
content = None
|
||||
|
||||
# Build stored message for round-tripping back to the API
|
||||
stored_msg: dict = {"role": "assistant"}
|
||||
if assistant_msg.get("reasoning_content") is not None:
|
||||
stored_msg["reasoning_content"] = assistant_msg["reasoning_content"]
|
||||
elif "reasoning_content" in assistant_msg:
|
||||
stored_msg["reasoning_content"] = ""
|
||||
if content:
|
||||
stored_msg["content"] = content
|
||||
if tool_calls:
|
||||
stored_msg["tool_calls"] = tool_calls
|
||||
|
||||
messages.append(stored_msg)
|
||||
|
||||
if finish_reason == "tool_calls" and tool_calls:
|
||||
ws_logger.info("DeepSeek requested %d tool call(s)", len(tool_calls))
|
||||
|
||||
for tc in tool_calls:
|
||||
@@ -267,21 +360,29 @@ async def chat_with_tools(
|
||||
"content": result,
|
||||
})
|
||||
|
||||
# Continue loop — DeepSeek will process tool results in next turn
|
||||
continue
|
||||
|
||||
elif finish_reason == "stop":
|
||||
return assistant_msg.get("content", "")
|
||||
return _strip_dsml(content)
|
||||
|
||||
else:
|
||||
ws_logger.warning("Unexpected finish_reason: %s", finish_reason)
|
||||
return assistant_msg.get("content", "")
|
||||
return _strip_dsml(content)
|
||||
|
||||
# Tool turns exhausted — make one final call without tool definitions so
|
||||
# DeepSeek is forced to produce a textual answer from the results collected.
|
||||
ws_logger.info("Tool turns exhausted; making final call without tools")
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": (
|
||||
"Please provide your answer now based on the tool results above. "
|
||||
"Do not attempt any more tool calls. Summarize what you found."
|
||||
),
|
||||
})
|
||||
data = await call_deepseek(api_url, api_key, model, messages, tools=None)
|
||||
return data["choices"][0]["message"].get("content", "") or (
|
||||
final = data["choices"][0]["message"].get("content", "") or ""
|
||||
final = _strip_dsml(final)
|
||||
return final or (
|
||||
"I wasn't able to complete the request within the allowed number of steps. "
|
||||
"Please try again or simplify your question."
|
||||
)
|
||||
@@ -339,7 +440,7 @@ def make_app(ws):
|
||||
ws_name = ws["name"]
|
||||
deepseek_api_key = ws["deepseek_api_key"]
|
||||
deepseek_api_url = ws.get("deepseek_api_url", "https://api.deepseek.com/chat/completions")
|
||||
deepseek_model = ws.get("deepseek_model", "deepseek-chat")
|
||||
deepseek_model = ws.get("deepseek_model", "deepseek-v4-pro")
|
||||
|
||||
ws_logger = logging.LoggerAdapter(logger, {"workspace": ws_name})
|
||||
|
||||
|
||||
@@ -3,3 +3,4 @@ aiohttp>=3.9.0
|
||||
httpx>=0.28.1
|
||||
md2mrkdwn>=0.4.3
|
||||
duckduckgo-search>=8.0.0
|
||||
lxml>=5.0.0
|
||||
|
||||
Reference in New Issue
Block a user