Upload files to "/"

This commit is contained in:
2026-04-05 11:47:39 +00:00
parent 09997ffd64
commit dafaa490be
4 changed files with 48 additions and 30 deletions
+2 -2
View File
@@ -28,13 +28,13 @@ Single-file pipelines that run inside Open WebUI's pipelines container. The flow
| diagram | qwen2.5-coder:14b | Mermaid output |
| reasoning (FI/EN) | gpt-oss:120b / 20b | Finnish detection via keyword scoring (threshold ≥ 2) |
| image_generation | gpt-oss → SDXL Base | LLM refines prompt, then calls A1111 API |
| uncensored image | Juggernaut XL v9 (no LLM) | Triggered by "uncen" prefix, skips classifier, search, and LLM refinement |
| uncensored image | dolphin-mistral:7b → Juggernaut XL v9 | Triggered by "uncen" prefix, skips classifier and search, uses uncensored LLM for prompt refinement |
| vision | llama3.2-vision:11b | Only when latest user message has image |
| general | gpt-oss:120b / 20b | |
### Key Design Decisions
- **"uncen" prefix** — highest priority check, bypasses everything (classification, search, vision detection, LLM refinement) and sends the user's text directly to Juggernaut XL v9 with quality tags appended. LLM is skipped entirely to avoid refusal from censored models.
- **"uncen" prefix** — highest priority check, bypasses everything (classification, search, vision detection) and routes to uncensored image generation. Uses dolphin-mistral:7b (uncensored LLM) for prompt refinement instead of gpt-oss which refuses NSFW content. Falls back to raw prompt + quality tags if dolphin-mistral is unavailable.
- **Classifier strictness** — "coding" only triggers when user explicitly asks for code output. Discussing IT/tech topics routes to general/reasoning.
- **Finnish/English bilingual** — Finnish detected by scoring FINNISH_INDICATORS. A Finnish instruction is injected into system prompts for all categories.
- **Search is aggressive** — heuristic layer ensures search triggers for factual questions, even if AI classifier says no.
+10 -8
View File
@@ -24,7 +24,7 @@ An intelligent prompt classification and routing pipeline for [Open WebUI](https
| reasoning (FI) | gpt-oss:120b | gpt-oss:20b | Analysis, comparison, strategy (Finnish) |
| reasoning (EN) | gpt-oss:120b | gpt-oss:20b | Analysis, comparison, strategy (English) |
| image generation | gpt-oss:120b + SDXL | gpt-oss:20b + SDXL | "generate an image", "luo kuva" |
| uncensored image | Juggernaut XL v9 | Juggernaut XL v9 | Prompt starts with `uncen` |
| uncensored image | dolphin-mistral:7b + Juggernaut XL v9 | dolphin-mistral:7b + Juggernaut XL v9 | Prompt starts with `uncen` |
| vision | llama3.2-vision:11b | llama3.2-vision:11b | User uploads an image |
| general | gpt-oss:120b | gpt-oss:20b | Everything else |
@@ -43,6 +43,7 @@ Two pipeline variants are provided:
ollama pull qwen2.5-coder:14b
ollama pull gpt-oss:120b # or gpt-oss:20b for the lighter variant
ollama pull llama3.2-vision:11b
ollama pull dolphin-mistral:7b # uncensored model for image prompt refinement
```
- **Brave Search API key** (free tier: https://brave.com/search/api/)
@@ -221,7 +222,7 @@ uncen a beautiful sunset over the ocean
uncen portrait of a warrior in golden armor
```
The `uncen` prefix is stripped and the user's text is sent directly to Stable Diffusion with quality tags appended — **no LLM refinement** (to avoid model refusal). The pipeline switches the SD checkpoint via the API automatically.
The `uncen` prefix is stripped and the prompt is refined by **dolphin-mistral:7b** (an uncensored LLM that won't refuse any content) instead of gpt-oss. The pipeline switches the SD checkpoint to Juggernaut XL v9 automatically. If dolphin-mistral is unavailable, it falls back to sending the user's text directly with quality tags appended.
### How it works
@@ -233,11 +234,12 @@ The `uncen` prefix is stripped and the user's text is sent directly to Stable Di
5. SD checkpoint is unloaded from VRAM and page cache is dropped
**Uncensored mode:**
1. `uncen` prefix is stripped, quality tags appended directly (no LLM call)
2. Ollama models are unloaded from VRAM
3. SD checkpoint is switched to Juggernaut XL v9
4. Image is generated, compressed PNG→JPEG, and streamed in 4KB chunks
5. SD checkpoint is unloaded from VRAM and page cache is dropped
1. `uncen` prefix is stripped
2. dolphin-mistral:7b refines the prompt into optimized SD tags (no refusal)
3. Ollama models are unloaded from VRAM
4. SD checkpoint is switched to Juggernaut XL v9
5. Image is generated, compressed PNG→JPEG, and streamed in 4KB chunks
6. SD checkpoint is unloaded from VRAM and page cache is dropped
## VRAM Management
@@ -258,7 +260,7 @@ sudo sh -c 'sync; echo 3 > /proc/sys/vm/drop_caches'
```
User Message
├─ "uncen" prefix? ─────────────── → Juggernaut XL v9 (direct, no search)
├─ "uncen" prefix? ─────────────── → dolphin-mistral:7b (refine) → Juggernaut XL v9
├─ Image uploaded? ──────────────── → llama3.2-vision:11b
+18 -10
View File
@@ -32,6 +32,7 @@ from io import BytesIO
# ---------------------------------------------------------------------------
OLLAMA_URL = "http://ollama:11434"
CLASSIFIER_MODEL = "qwen2.5:7b"
UNCENSORED_MODEL = "dolphin-mistral:7b"
BRAVE_API_KEY = os.environ.get("BRAVE_API_KEY", "")
BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search"
@@ -614,19 +615,22 @@ def _raw_sd_prompt(user_message: str) -> str:
def _refine_sd_prompt(user_message: str, ollama_url: str, messages: List[dict] = None, uncensored: bool = False) -> str:
"""Use the LLM to convert a user request into an optimized SD prompt.
Includes conversation history so the model understands context like 'generate an image of that'.
For uncensored mode, skips LLM entirely to avoid refusal.
For uncensored mode, uses dolphin-mistral (no refusal). Falls back to raw prompt on failure.
"""
if uncensored:
return _raw_sd_prompt(user_message)
try:
# Pick model and system prompt based on mode
if uncensored:
model = UNCENSORED_MODEL
sys_key = "image_generation_uncensored"
else:
model = MODELS["image_generation"]
sys_key = "image_generation"
# Build context from recent conversation history
sys_key = "image_generation_uncensored" if uncensored else "image_generation"
context_messages = [{"role": "system", "content": SYSTEM_PROMPTS[sys_key]}]
if messages:
# Include last few exchanges for context (trim to avoid blowing up the context)
recent = [m for m in messages if m.get("role") in ("user", "assistant") and m.get("content")]
for msg in recent[-6:]: # Last 3 exchanges
for msg in recent[-6:]:
content = msg["content"]
if isinstance(content, list):
content = " ".join(p.get("text", "") for p in content if isinstance(p, dict))
@@ -635,7 +639,7 @@ def _refine_sd_prompt(user_message: str, ollama_url: str, messages: List[dict] =
context_messages.append({"role": "user", "content": user_message[:500]})
payload = {
"model": MODELS["image_generation"],
"model": model,
"messages": context_messages,
"stream": False,
"options": {"temperature": 0.7, "num_ctx": 4096},
@@ -650,8 +654,8 @@ def _refine_sd_prompt(user_message: str, ollama_url: str, messages: List[dict] =
return refined
except Exception as e:
print(f"[Router] SD prompt refinement failed: {e}")
# Fallback: use the user message directly
return user_message
# Fallback: raw prompt with quality tags
return _raw_sd_prompt(user_message)
def _negative_prompt() -> str:
@@ -922,6 +926,10 @@ class Pipeline:
target_model = MODELS.get(category, MODELS["general"])
system_prompt = SYSTEM_PROMPTS.get(category, SYSTEM_PROMPTS["general"])
# Override display model for uncensored mode
if uncensored:
target_model = f"{UNCENSORED_MODEL}{SD_MODEL_UNCENSORED}"
# Inject language instruction — always respond in the user's language
if detect_finnish(user_message) and category not in ("reasoning_fi", "image_generation"):
system_prompt = (
+18 -10
View File
@@ -32,6 +32,7 @@ from io import BytesIO
# ---------------------------------------------------------------------------
OLLAMA_URL = "http://ollama:11434"
CLASSIFIER_MODEL = "qwen2.5:7b"
UNCENSORED_MODEL = "dolphin-mistral:7b"
BRAVE_API_KEY = os.environ.get("BRAVE_API_KEY", "")
BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search"
@@ -616,19 +617,22 @@ def _raw_sd_prompt(user_message: str) -> str:
def _refine_sd_prompt(user_message: str, ollama_url: str, messages: List[dict] = None, uncensored: bool = False) -> str:
"""Use the LLM to convert a user request into an optimized SD prompt.
Includes conversation history so the model understands context like 'generate an image of that'.
For uncensored mode, skips LLM entirely to avoid refusal.
For uncensored mode, uses dolphin-mistral (no refusal). Falls back to raw prompt on failure.
"""
if uncensored:
return _raw_sd_prompt(user_message)
try:
# Pick model and system prompt based on mode
if uncensored:
model = UNCENSORED_MODEL
sys_key = "image_generation_uncensored"
else:
model = MODELS["image_generation"]
sys_key = "image_generation"
# Build context from recent conversation history
sys_key = "image_generation_uncensored" if uncensored else "image_generation"
context_messages = [{"role": "system", "content": SYSTEM_PROMPTS[sys_key]}]
if messages:
# Include last few exchanges for context (trim to avoid blowing up the context)
recent = [m for m in messages if m.get("role") in ("user", "assistant") and m.get("content")]
for msg in recent[-6:]: # Last 3 exchanges
for msg in recent[-6:]:
content = msg["content"]
if isinstance(content, list):
content = " ".join(p.get("text", "") for p in content if isinstance(p, dict))
@@ -637,7 +641,7 @@ def _refine_sd_prompt(user_message: str, ollama_url: str, messages: List[dict] =
context_messages.append({"role": "user", "content": user_message[:500]})
payload = {
"model": MODELS["image_generation"],
"model": model,
"messages": context_messages,
"stream": False,
"options": {"temperature": 0.7, "num_ctx": 4096},
@@ -652,8 +656,8 @@ def _refine_sd_prompt(user_message: str, ollama_url: str, messages: List[dict] =
return refined
except Exception as e:
print(f"[Router] SD prompt refinement failed: {e}")
# Fallback: use the user message directly
return user_message
# Fallback: raw prompt with quality tags
return _raw_sd_prompt(user_message)
def _negative_prompt() -> str:
@@ -924,6 +928,10 @@ class Pipeline:
target_model = MODELS.get(category, MODELS["general"])
system_prompt = SYSTEM_PROMPTS.get(category, SYSTEM_PROMPTS["general"])
# Override display model for uncensored mode
if uncensored:
target_model = f"{UNCENSORED_MODEL}{SD_MODEL_UNCENSORED}"
# Inject language instruction — always respond in the user's language
if detect_finnish(user_message) and category not in ("reasoning_fi", "image_generation"):
system_prompt = (