Upload files to "/"

2026-04-05 11:47:39 +00:00
parent 09997ffd64
commit dafaa490be
4 changed files with 48 additions and 30 deletions
@@ -28,13 +28,13 @@ Single-file pipelines that run inside Open WebUI's pipelines container. The flow
 | diagram | qwen2.5-coder:14b | Mermaid output |
 | reasoning (FI/EN) | gpt-oss:120b / 20b | Finnish detection via keyword scoring (threshold ≥ 2) |
 | image_generation | gpt-oss → SDXL Base | LLM refines prompt, then calls A1111 API |
-| uncensored image | Juggernaut XL v9 (no LLM) | Triggered by "uncen" prefix, skips classifier, search, and LLM refinement |
+| uncensored image | dolphin-mistral:7b → Juggernaut XL v9 | Triggered by "uncen" prefix, skips classifier and search, uses uncensored LLM for prompt refinement |
 | vision | llama3.2-vision:11b | Only when latest user message has image |
 | general | gpt-oss:120b / 20b | |
 ### Key Design Decisions
- **"uncen" prefix** — highest priority check, bypasses everything (classification, search, vision detection, LLM refinement) and sends the user's text directly to Juggernaut XL v9 with quality tags appended. LLM is skipped entirely to avoid refusal from censored models.
+- **"uncen" prefix** — highest priority check, bypasses everything (classification, search, vision detection) and routes to uncensored image generation. Uses dolphin-mistral:7b (uncensored LLM) for prompt refinement instead of gpt-oss which refuses NSFW content. Falls back to raw prompt + quality tags if dolphin-mistral is unavailable.
 - **Classifier strictness** — "coding" only triggers when user explicitly asks for code output. Discussing IT/tech topics routes to general/reasoning.
 - **Finnish/English bilingual** — Finnish detected by scoring FINNISH_INDICATORS. A Finnish instruction is injected into system prompts for all categories.
 - **Search is aggressive** — heuristic layer ensures search triggers for factual questions, even if AI classifier says no.
@@ -24,7 +24,7 @@ An intelligent prompt classification and routing pipeline for [Open WebUI](https
 | reasoning (FI) | gpt-oss:120b | gpt-oss:20b | Analysis, comparison, strategy (Finnish) |
 | reasoning (EN) | gpt-oss:120b | gpt-oss:20b | Analysis, comparison, strategy (English) |
 | image generation | gpt-oss:120b + SDXL | gpt-oss:20b + SDXL | "generate an image", "luo kuva" |
-| uncensored image | Juggernaut XL v9 | Juggernaut XL v9 | Prompt starts with `uncen` |
+| uncensored image | dolphin-mistral:7b + Juggernaut XL v9 | dolphin-mistral:7b + Juggernaut XL v9 | Prompt starts with `uncen` |
 | vision | llama3.2-vision:11b | llama3.2-vision:11b | User uploads an image |
 | general | gpt-oss:120b | gpt-oss:20b | Everything else |
@@ -43,6 +43,7 @@ Two pipeline variants are provided:
  ollama pull qwen2.5-coder:14b
  ollama pull gpt-oss:120b    # or gpt-oss:20b for the lighter variant
  ollama pull llama3.2-vision:11b
  ollama pull dolphin-mistral:7b   # uncensored model for image prompt refinement
  ```
 - **Brave Search API key** (free tier: https://brave.com/search/api/)
@@ -221,7 +222,7 @@ uncen a beautiful sunset over the ocean
 uncen portrait of a warrior in golden armor
 ```
-The `uncen` prefix is stripped and the user's text is sent directly to Stable Diffusion with quality tags appended — **no LLM refinement** (to avoid model refusal). The pipeline switches the SD checkpoint via the API automatically.
+The `uncen` prefix is stripped and the prompt is refined by **dolphin-mistral:7b** (an uncensored LLM that won't refuse any content) instead of gpt-oss. The pipeline switches the SD checkpoint to Juggernaut XL v9 automatically. If dolphin-mistral is unavailable, it falls back to sending the user's text directly with quality tags appended.
 ### How it works
@@ -233,11 +234,12 @@ The `uncen` prefix is stripped and the user's text is sent directly to Stable Di
 5. SD checkpoint is unloaded from VRAM and page cache is dropped
 **Uncensored mode:**
-1. `uncen` prefix is stripped, quality tags appended directly (no LLM call)
+1. `uncen` prefix is stripped
-2. Ollama models are unloaded from VRAM
+2. dolphin-mistral:7b refines the prompt into optimized SD tags (no refusal)
-3. SD checkpoint is switched to Juggernaut XL v9
+3. Ollama models are unloaded from VRAM
-4. Image is generated, compressed PNG→JPEG, and streamed in 4KB chunks
+4. SD checkpoint is switched to Juggernaut XL v9
-5. SD checkpoint is unloaded from VRAM and page cache is dropped
+5. Image is generated, compressed PNG→JPEG, and streamed in 4KB chunks
 6. SD checkpoint is unloaded from VRAM and page cache is dropped
 ## VRAM Management
@@ -258,7 +260,7 @@ sudo sh -c 'sync; echo 3 > /proc/sys/vm/drop_caches'
 ```
 User Message
    │
-    ├─ "uncen" prefix? ─────────────── → Juggernaut XL v9 (direct, no search)
+    ├─ "uncen" prefix? ─────────────── → dolphin-mistral:7b (refine) → Juggernaut XL v9
    │
    ├─ Image uploaded? ──────────────── → llama3.2-vision:11b
    │
@@ -32,6 +32,7 @@ from io import BytesIO
 # ---------------------------------------------------------------------------
 OLLAMA_URL = "http://ollama:11434"
 CLASSIFIER_MODEL = "qwen2.5:7b"
 UNCENSORED_MODEL = "dolphin-mistral:7b"
 BRAVE_API_KEY = os.environ.get("BRAVE_API_KEY", "")
 BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search"
@@ -614,19 +615,22 @@ def _raw_sd_prompt(user_message: str) -> str:
 def _refine_sd_prompt(user_message: str, ollama_url: str, messages: List[dict] = None, uncensored: bool = False) -> str:
    """Use the LLM to convert a user request into an optimized SD prompt.
    Includes conversation history so the model understands context like 'generate an image of that'.
-    For uncensored mode, skips LLM entirely to avoid refusal.
+    For uncensored mode, uses dolphin-mistral (no refusal). Falls back to raw prompt on failure.
    """
    if uncensored:
        return _raw_sd_prompt(user_message)
    try:
        # Pick model and system prompt based on mode
        if uncensored:
            model = UNCENSORED_MODEL
            sys_key = "image_generation_uncensored"
        else:
            model = MODELS["image_generation"]
            sys_key = "image_generation"
        # Build context from recent conversation history
        sys_key = "image_generation_uncensored" if uncensored else "image_generation"
        context_messages = [{"role": "system", "content": SYSTEM_PROMPTS[sys_key]}]
        if messages:
            # Include last few exchanges for context (trim to avoid blowing up the context)
            recent = [m for m in messages if m.get("role") in ("user", "assistant") and m.get("content")]
-            for msg in recent[-6:]:  # Last 3 exchanges
+            for msg in recent[-6:]:
                content = msg["content"]
                if isinstance(content, list):
                    content = " ".join(p.get("text", "") for p in content if isinstance(p, dict))
@@ -635,7 +639,7 @@ def _refine_sd_prompt(user_message: str, ollama_url: str, messages: List[dict] =
            context_messages.append({"role": "user", "content": user_message[:500]})
        payload = {
-            "model": MODELS["image_generation"],
+            "model": model,
            "messages": context_messages,
            "stream": False,
            "options": {"temperature": 0.7, "num_ctx": 4096},
@@ -650,8 +654,8 @@ def _refine_sd_prompt(user_message: str, ollama_url: str, messages: List[dict] =
        return refined
    except Exception as e:
        print(f"[Router] SD prompt refinement failed: {e}")
-        # Fallback: use the user message directly
+        # Fallback: raw prompt with quality tags
-        return user_message
+        return _raw_sd_prompt(user_message)
 def _negative_prompt() -> str:
@@ -922,6 +926,10 @@ class Pipeline:
        target_model = MODELS.get(category, MODELS["general"])
        system_prompt = SYSTEM_PROMPTS.get(category, SYSTEM_PROMPTS["general"])
        # Override display model for uncensored mode
        if uncensored:
            target_model = f"{UNCENSORED_MODEL} → {SD_MODEL_UNCENSORED}"
        # Inject language instruction — always respond in the user's language
        if detect_finnish(user_message) and category not in ("reasoning_fi", "image_generation"):
            system_prompt = (
@@ -32,6 +32,7 @@ from io import BytesIO
 # ---------------------------------------------------------------------------
 OLLAMA_URL = "http://ollama:11434"
 CLASSIFIER_MODEL = "qwen2.5:7b"
 UNCENSORED_MODEL = "dolphin-mistral:7b"
 BRAVE_API_KEY = os.environ.get("BRAVE_API_KEY", "")
 BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search"
@@ -616,19 +617,22 @@ def _raw_sd_prompt(user_message: str) -> str:
 def _refine_sd_prompt(user_message: str, ollama_url: str, messages: List[dict] = None, uncensored: bool = False) -> str:
    """Use the LLM to convert a user request into an optimized SD prompt.
    Includes conversation history so the model understands context like 'generate an image of that'.
-    For uncensored mode, skips LLM entirely to avoid refusal.
+    For uncensored mode, uses dolphin-mistral (no refusal). Falls back to raw prompt on failure.
    """
    if uncensored:
        return _raw_sd_prompt(user_message)
    try:
        # Pick model and system prompt based on mode
        if uncensored:
            model = UNCENSORED_MODEL
            sys_key = "image_generation_uncensored"
        else:
            model = MODELS["image_generation"]
            sys_key = "image_generation"
        # Build context from recent conversation history
        sys_key = "image_generation_uncensored" if uncensored else "image_generation"
        context_messages = [{"role": "system", "content": SYSTEM_PROMPTS[sys_key]}]
        if messages:
            # Include last few exchanges for context (trim to avoid blowing up the context)
            recent = [m for m in messages if m.get("role") in ("user", "assistant") and m.get("content")]
-            for msg in recent[-6:]:  # Last 3 exchanges
+            for msg in recent[-6:]:
                content = msg["content"]
                if isinstance(content, list):
                    content = " ".join(p.get("text", "") for p in content if isinstance(p, dict))
@@ -637,7 +641,7 @@ def _refine_sd_prompt(user_message: str, ollama_url: str, messages: List[dict] =
            context_messages.append({"role": "user", "content": user_message[:500]})
        payload = {
-            "model": MODELS["image_generation"],
+            "model": model,
            "messages": context_messages,
            "stream": False,
            "options": {"temperature": 0.7, "num_ctx": 4096},
@@ -652,8 +656,8 @@ def _refine_sd_prompt(user_message: str, ollama_url: str, messages: List[dict] =
        return refined
    except Exception as e:
        print(f"[Router] SD prompt refinement failed: {e}")
-        # Fallback: use the user message directly
+        # Fallback: raw prompt with quality tags
-        return user_message
+        return _raw_sd_prompt(user_message)
 def _negative_prompt() -> str:
@@ -924,6 +928,10 @@ class Pipeline:
        target_model = MODELS.get(category, MODELS["general"])
        system_prompt = SYSTEM_PROMPTS.get(category, SYSTEM_PROMPTS["general"])
        # Override display model for uncensored mode
        if uncensored:
            target_model = f"{UNCENSORED_MODEL} → {SD_MODEL_UNCENSORED}"
        # Inject language instruction — always respond in the user's language
        if detect_finnish(user_message) and category not in ("reasoning_fi", "image_generation"):
            system_prompt = (