将autogen大模型调用底层hook掉

2023-11-11 22:01:19 +08:00 · 2023-11-11 22:01:19 +08:00 · 28119e343c
commit 28119e343c
parent 804599bbc3
8 changed files with 33 additions and 634 deletions
--- a/config.py
+++ b/config.py
@ -211,7 +211,7 @@ ALLOW_RESET_CONFIG = False
 # 在使用AutoGen插件时，是否使用Docker容器运行代码
-AUTOGEN_USE_DOCKER = True
+AUTOGEN_USE_DOCKER = False
 # 临时的上传文件夹位置，请勿修改
--- a/crazy_functional.py
+++ b/crazy_functional.py
@ -539,18 +539,15 @@ def get_crazy_functions():
    except:
        print('Load function plugin failed')
-    try:
+    from crazy_functions.多智能体 import 多智能体终端
-        from crazy_functions.多智能体 import 多智能体终端
+    function_plugins.update({
-        function_plugins.update({
+        "多智能体终端（微软AutoGen）": {
-            "多智能体终端（微软AutoGen）": {
+            "Group": "智能体",
-                "Group": "智能体",
+            "Color": "stop",
-                "Color": "stop",
+            "AsButton": True,
-                "AsButton": True,
+            "Function": HotReload(多智能体终端)
-                "Function": HotReload(多智能体终端)
+        }
-            }
+    })
        })
    except:
        print('Load function plugin failed')
    # try:
    #     from crazy_functions.chatglm微调工具 import 微调数据集生成
--- a/crazy_functions/agent_fns/auto_agent.py
+++ b/crazy_functions/agent_fns/auto_agent.py
@ -2,8 +2,6 @@ from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc,
 from toolbox import report_execption, get_log_folder, update_ui_lastest_msg, Singleton
 from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
 from crazy_functions.agent_fns.general import AutoGenGeneral
 import time
 from autogen import AssistantAgent, UserProxyAgent
--- a/crazy_functions/agent_fns/bridge_autogen.py
+++ b/crazy_functions/agent_fns/bridge_autogen.py
@ -1,584 +0,0 @@
 from time import sleep
 import logging
 import time
 from typing import List, Optional, Dict, Callable, Union
 import sys
 import shutil
 import numpy as np
 from flaml import tune, BlendSearch
 from flaml.tune.space import is_constant
 from flaml.automl.logger import logger_formatter
 from collections import defaultdict
 try:
    import openai
    from openai.error import (
        ServiceUnavailableError,
        RateLimitError,
        APIError,
        InvalidRequestError,
        APIConnectionError,
        Timeout,
        AuthenticationError,
    )
    from openai import Completion as openai_Completion
    import diskcache
    ERROR = None
 except ImportError:
    ERROR = ImportError("please install openai and diskcache to use the autogen.oai subpackage.")
    openai_Completion = object
 logger = logging.getLogger(__name__)
 if not logger.handlers:
    # Add the console handler.
    _ch = logging.StreamHandler(stream=sys.stdout)
    _ch.setFormatter(logger_formatter)
    logger.addHandler(_ch)
 class Completion(openai_Completion):
    """A class for OpenAI completion API.
    It also supports: ChatCompletion, Azure OpenAI API.
    """
    # set of models that support chat completion
    chat_models = {
        "gpt-3.5-turbo",
        "gpt-3.5-turbo-0301",  # deprecate in Sep
        "gpt-3.5-turbo-0613",
        "gpt-3.5-turbo-16k",
        "gpt-3.5-turbo-16k-0613",
        "gpt-35-turbo",
        "gpt-35-turbo-16k",
        "gpt-4",
        "gpt-4-32k",
        "gpt-4-32k-0314",  # deprecate in Sep
        "gpt-4-0314",  # deprecate in Sep
        "gpt-4-0613",
        "gpt-4-32k-0613",
    }
    # price per 1k tokens
    price1K = {
        "text-ada-001": 0.0004,
        "text-babbage-001": 0.0005,
        "text-curie-001": 0.002,
        "code-cushman-001": 0.024,
        "code-davinci-002": 0.1,
        "text-davinci-002": 0.02,
        "text-davinci-003": 0.02,
        "gpt-3.5-turbo": (0.0015, 0.002),
        "gpt-3.5-turbo-instruct": (0.0015, 0.002),
        "gpt-3.5-turbo-0301": (0.0015, 0.002),  # deprecate in Sep
        "gpt-3.5-turbo-0613": (0.0015, 0.002),
        "gpt-3.5-turbo-16k": (0.003, 0.004),
        "gpt-3.5-turbo-16k-0613": (0.003, 0.004),
        "gpt-35-turbo": (0.0015, 0.002),
        "gpt-35-turbo-16k": (0.003, 0.004),
        "gpt-35-turbo-instruct": (0.0015, 0.002),
        "gpt-4": (0.03, 0.06),
        "gpt-4-32k": (0.06, 0.12),
        "gpt-4-0314": (0.03, 0.06),  # deprecate in Sep
        "gpt-4-32k-0314": (0.06, 0.12),  # deprecate in Sep
        "gpt-4-0613": (0.03, 0.06),
        "gpt-4-32k-0613": (0.06, 0.12),
    }
    default_search_space = {
        "model": tune.choice(
            [
                "text-ada-001",
                "text-babbage-001",
                "text-davinci-003",
                "gpt-3.5-turbo",
                "gpt-4",
            ]
        ),
        "temperature_or_top_p": tune.choice(
            [
                {"temperature": tune.uniform(0, 2)},
                {"top_p": tune.uniform(0, 1)},
            ]
        ),
        "max_tokens": tune.lograndint(50, 1000),
        "n": tune.randint(1, 100),
        "prompt": "{prompt}",
    }
    seed = 41
    cache_path = f".cache/{seed}"
    # retry after this many seconds
    retry_wait_time = 10
    # fail a request after hitting RateLimitError for this many seconds
    max_retry_period = 120
    # time out for request to openai server
    request_timeout = 60
    openai_completion_class = not ERROR and openai.Completion
    _total_cost = 0
    optimization_budget = None
    _history_dict = _count_create = None
    @classmethod
    def set_cache(cls, seed: Optional[int] = 41, cache_path_root: Optional[str] = ".cache"):
        """Set cache path.
        Args:
            seed (int, Optional): The integer identifier for the pseudo seed.
                Results corresponding to different seeds will be cached in different places.
            cache_path (str, Optional): The root path for the cache.
                The complete cache path will be {cache_path}/{seed}.
        """
        cls.seed = seed
        cls.cache_path = f"{cache_path_root}/{seed}"
    @classmethod
    def clear_cache(cls, seed: Optional[int] = None, cache_path_root: Optional[str] = ".cache"):
        """Clear cache.
        Args:
            seed (int, Optional): The integer identifier for the pseudo seed.
                If omitted, all caches under cache_path_root will be cleared.
            cache_path (str, Optional): The root path for the cache.
                The complete cache path will be {cache_path}/{seed}.
        """
        if seed is None:
            shutil.rmtree(cache_path_root, ignore_errors=True)
            return
        with diskcache.Cache(f"{cache_path_root}/{seed}") as cache:
            cache.clear()
    @classmethod
    def _book_keeping(cls, config: Dict, response):
        """Book keeping for the created completions."""
        if response != -1 and "cost" not in response:
            response["cost"] = cls.cost(response)
        if cls._history_dict is None:
            return
        if cls._history_compact:
            value = {
                "created_at": [],
                "cost": [],
                "token_count": [],
            }
            if "messages" in config:
                messages = config["messages"]
                if len(messages) > 1 and messages[-1]["role"] != "assistant":
                    existing_key = get_key(messages[:-1])
                    value = cls._history_dict.pop(existing_key, value)
                key = get_key(messages + [choice["message"] for choice in response["choices"]])
            else:
                key = get_key([config["prompt"]] + [choice.get("text") for choice in response["choices"]])
            value["created_at"].append(cls._count_create)
            value["cost"].append(response["cost"])
            value["token_count"].append(
                {
                    "model": response["model"],
                    "prompt_tokens": response["usage"]["prompt_tokens"],
                    "completion_tokens": response["usage"].get("completion_tokens", 0),
                    "total_tokens": response["usage"]["total_tokens"],
                }
            )
            cls._history_dict[key] = value
            cls._count_create += 1
            return
        cls._history_dict[cls._count_create] = {
            "request": config,
            "response": response.to_dict_recursive(),
        }
        cls._count_create += 1
    @classmethod
    def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_cache=True):
        """Get the response from the openai api call.
        Try cache first. If not found, call the openai api. If the api call fails, retry after retry_wait_time.
        """
        config = config.copy()
    @classmethod
    def _get_max_valid_n(cls, key, max_tokens):
        # find the max value in max_valid_n_per_max_tokens
        # whose key is equal or larger than max_tokens
        return max(
            (value for k, value in cls._max_valid_n_per_max_tokens.get(key, {}).items() if k >= max_tokens),
            default=1,
        )
    @classmethod
    def _get_min_invalid_n(cls, key, max_tokens):
        # find the min value in min_invalid_n_per_max_tokens
        # whose key is equal or smaller than max_tokens
        return min(
            (value for k, value in cls._min_invalid_n_per_max_tokens.get(key, {}).items() if k <= max_tokens),
            default=None,
        )
    @classmethod
    def _get_region_key(cls, config):
        # get a key for the valid/invalid region corresponding to the given config
        config = cls._pop_subspace(config, always_copy=False)
        return (
            config["model"],
            config.get("prompt", config.get("messages")),
            config.get("stop"),
        )
    @classmethod
    def _update_invalid_n(cls, prune, region_key, max_tokens, num_completions):
        if prune:
            # update invalid n and prune this config
            cls._min_invalid_n_per_max_tokens[region_key] = invalid_n = cls._min_invalid_n_per_max_tokens.get(
                region_key, {}
            )
            invalid_n[max_tokens] = min(num_completions, invalid_n.get(max_tokens, np.inf))
    @classmethod
    def _pop_subspace(cls, config, always_copy=True):
        if "subspace" in config:
            config = config.copy()
            config.update(config.pop("subspace"))
        return config.copy() if always_copy else config
    @classmethod
    def _get_params_for_create(cls, config: Dict) -> Dict:
        """Get the params for the openai api call from a config in the search space."""
        params = cls._pop_subspace(config)
        if cls._prompts:
            params["prompt"] = cls._prompts[config["prompt"]]
        else:
            params["messages"] = cls._messages[config["messages"]]
        if "stop" in params:
            params["stop"] = cls._stops and cls._stops[params["stop"]]
        temperature_or_top_p = params.pop("temperature_or_top_p", None)
        if temperature_or_top_p:
            params.update(temperature_or_top_p)
        if cls._config_list and "config_list" not in params:
            params["config_list"] = cls._config_list
        return params
    @classmethod
    def create(
        cls,
        context: Optional[Dict] = None,
        use_cache: Optional[bool] = True,
        config_list: Optional[List[Dict]] = None,
        filter_func: Optional[Callable[[Dict, Dict, Dict], bool]] = None,
        raise_on_ratelimit_or_timeout: Optional[bool] = True,
        allow_format_str_template: Optional[bool] = False,
        **config,
    ):
        """Make a completion for a given context.
        Args:
            context (Dict, Optional): The context to instantiate the prompt.
                It needs to contain keys that are used by the prompt template or the filter function.
                E.g., `prompt="Complete the following sentence: {prefix}, context={"prefix": "Today I feel"}`.
                The actual prompt will be:
                "Complete the following sentence: Today I feel".
                More examples can be found at [templating](https://microsoft.github.io/autogen/docs/Use-Cases/enhanced_inference#templating).
            use_cache (bool, Optional): Whether to use cached responses.
            config_list (List, Optional): List of configurations for the completion to try.
                The first one that does not raise an error will be used.
                Only the differences from the default config need to be provided.
                E.g.,
        ```python
        response = oai.Completion.create(
            config_list=[
                {
                    "model": "gpt-4",
                    "api_key": os.environ.get("AZURE_OPENAI_API_KEY"),
                    "api_type": "azure",
                    "api_base": os.environ.get("AZURE_OPENAI_API_BASE"),
                    "api_version": "2023-03-15-preview",
                },
                {
                    "model": "gpt-3.5-turbo",
                    "api_key": os.environ.get("OPENAI_API_KEY"),
                    "api_type": "open_ai",
                    "api_base": "https://api.openai.com/v1",
                },
                {
                    "model": "llama-7B",
                    "api_base": "http://127.0.0.1:8080",
                    "api_type": "open_ai",
                }
            ],
            prompt="Hi",
        )
        ```
            filter_func (Callable, Optional): A function that takes in the context, the config and the response and returns a boolean to indicate whether the response is valid. E.g.,
        ```python
        def yes_or_no_filter(context, config, response):
            return context.get("yes_or_no_choice", False) is False or any(
                text in ["Yes.", "No."] for text in oai.Completion.extract_text(response)
            )
        ```
            raise_on_ratelimit_or_timeout (bool, Optional): Whether to raise RateLimitError or Timeout when all configs fail.
                When set to False, -1 will be returned when all configs fail.
            allow_format_str_template (bool, Optional): Whether to allow format string template in the config.
            **config: Configuration for the openai API call. This is used as parameters for calling openai API.
                The "prompt" or "messages" parameter can contain a template (str or Callable) which will be instantiated with the context.
                Besides the parameters for the openai API call, it can also contain:
                - `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
                - `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
                - `seed` (int) for the cache. This is useful when implementing "controlled randomness" for the completion.
        Returns:
            Responses from OpenAI API, with additional fields.
                - `cost`: the total cost.
            When `config_list` is provided, the response will contain a few more fields:
                - `config_id`: the index of the config in the config_list that is used to generate the response.
                - `pass_filter`: whether the response passes the filter function. None if no filter is provided.
        """
        if ERROR:
            raise ERROR
        config_list = [
            {
                "model": "llama-7B",
                "api_base": "http://127.0.0.1:8080",
                "api_type": "open_ai",
            }
        ]
        last = len(config_list) - 1
        cost = 0
        for i, each_config in enumerate(config_list):
            base_config = config.copy()
            base_config["allow_format_str_template"] = allow_format_str_template
            base_config.update(each_config)
            if i < last and filter_func is None and "max_retry_period" not in base_config:
                # max_retry_period = 0 to avoid retrying when no filter is given
                base_config["max_retry_period"] = 0
            try:
                response = cls.create(
                    context,
                    use_cache,
                    raise_on_ratelimit_or_timeout=i < last or raise_on_ratelimit_or_timeout,
                    **base_config,
                )
                if response == -1:
                    return response
                pass_filter = filter_func is None or filter_func(
                    context=context, base_config=config, response=response
                )
                if pass_filter or i == last:
                    response["cost"] = cost + response["cost"]
                    response["config_id"] = i
                    response["pass_filter"] = pass_filter
                    return response
                cost += response["cost"]
            except (AuthenticationError, RateLimitError, Timeout, InvalidRequestError):
                logger.debug(f"failed with config {i}", exc_info=1)
                if i == last:
                    raise
        params = cls._construct_params(context, config, allow_format_str_template=allow_format_str_template)
        if not use_cache:
            return cls._get_response(
                params, raise_on_ratelimit_or_timeout=raise_on_ratelimit_or_timeout, use_cache=False
            )
        seed = cls.seed
        if "seed" in params:
            cls.set_cache(params.pop("seed"))
        with diskcache.Cache(cls.cache_path) as cls._cache:
            cls.set_cache(seed)
            return cls._get_response(params, raise_on_ratelimit_or_timeout=raise_on_ratelimit_or_timeout)
    @classmethod
    def instantiate(
        cls,
        template: Union[str, None],
        context: Optional[Dict] = None,
        allow_format_str_template: Optional[bool] = False,
    ):
        if not context or template is None:
            return template
        if isinstance(template, str):
            return template.format(**context) if allow_format_str_template else template
        return template(context)
    @classmethod
    def _construct_params(cls, context, config, prompt=None, messages=None, allow_format_str_template=False):
        params = config.copy()
        model = config["model"]
        prompt = config.get("prompt") if prompt is None else prompt
        messages = config.get("messages") if messages is None else messages
        # either "prompt" should be in config (for being compatible with non-chat models)
        # or "messages" should be in config (for tuning chat models only)
        if prompt is None and (model in cls.chat_models or issubclass(cls, ChatCompletion)):
            if messages is None:
                raise ValueError("Either prompt or messages should be in config for chat models.")
        if prompt is None:
            params["messages"] = (
                [
                    {
                        **m,
                        "content": cls.instantiate(m["content"], context, allow_format_str_template),
                    }
                    if m.get("content")
                    else m
                    for m in messages
                ]
                if context
                else messages
            )
        elif model in cls.chat_models or issubclass(cls, ChatCompletion):
            # convert prompt to messages
            params["messages"] = [
                {
                    "role": "user",
                    "content": cls.instantiate(prompt, context, allow_format_str_template),
                },
            ]
            params.pop("prompt", None)
        else:
            params["prompt"] = cls.instantiate(prompt, context, allow_format_str_template)
        return params
    @classmethod
    def extract_text(cls, response: dict) -> List[str]:
        """Extract the text from a completion or chat response.
        Args:
            response (dict): The response from OpenAI API.
        Returns:
            A list of text in the responses.
        """
        choices = response["choices"]
        if "text" in choices[0]:
            return [choice["text"] for choice in choices]
        return [choice["message"].get("content", "") for choice in choices]
    @classmethod
    def extract_text_or_function_call(cls, response: dict) -> List[str]:
        """Extract the text or function calls from a completion or chat response.
        Args:
            response (dict): The response from OpenAI API.
        Returns:
            A list of text or function calls in the responses.
        """
        choices = response["choices"]
        if "text" in choices[0]:
            return [choice["text"] for choice in choices]
        return [
            choice["message"] if "function_call" in choice["message"] else choice["message"].get("content", "")
            for choice in choices
        ]
    @classmethod
    @property
    def logged_history(cls) -> Dict:
        """Return the book keeping dictionary."""
        return cls._history_dict
    @classmethod
    def print_usage_summary(cls) -> Dict:
        """Return the usage summary."""
        if cls._history_dict is None:
            print("No usage summary available.", flush=True)
        token_count_summary = defaultdict(lambda: {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0})
        if not cls._history_compact:
            source = cls._history_dict.values()
            total_cost = sum(msg_pair["response"]["cost"] for msg_pair in source)
        else:
            # source = cls._history_dict["token_count"]
            # total_cost = sum(cls._history_dict['cost'])
            total_cost = sum(sum(value_list["cost"]) for value_list in cls._history_dict.values())
            source = (
                token_data for value_list in cls._history_dict.values() for token_data in value_list["token_count"]
            )
        for entry in source:
            if not cls._history_compact:
                model = entry["response"]["model"]
                token_data = entry["response"]["usage"]
            else:
                model = entry["model"]
                token_data = entry
            token_count_summary[model]["prompt_tokens"] += token_data["prompt_tokens"]
            token_count_summary[model]["completion_tokens"] += token_data["completion_tokens"]
            token_count_summary[model]["total_tokens"] += token_data["total_tokens"]
        print(f"Total cost: {total_cost}", flush=True)
        for model, counts in token_count_summary.items():
            print(
                f"Token count summary for model {model}: prompt_tokens: {counts['prompt_tokens']}, completion_tokens: {counts['completion_tokens']}, total_tokens: {counts['total_tokens']}",
                flush=True,
            )
    @classmethod
    def start_logging(
        cls, history_dict: Optional[Dict] = None, compact: Optional[bool] = True, reset_counter: Optional[bool] = True
    ):
        """Start book keeping.
        Args:
            history_dict (Dict): A dictionary for book keeping.
                If no provided, a new one will be created.
            compact (bool): Whether to keep the history dictionary compact.
                Compact history contains one key per conversation, and the value is a dictionary
                like:
        ```python
        {
            "create_at": [0, 1],
            "cost": [0.1, 0.2],
        }
        ```
                where "created_at" is the index of API calls indicating the order of all the calls,
                and "cost" is the cost of each call. This example shows that the conversation is based
                on two API calls. The compact format is useful for condensing the history of a conversation.
                If compact is False, the history dictionary will contain all the API calls: the key
                is the index of the API call, and the value is a dictionary like:
        ```python
        {
            "request": request_dict,
            "response": response_dict,
        }
        ```
                where request_dict is the request sent to OpenAI API, and response_dict is the response.
                For a conversation containing two API calls, the non-compact history dictionary will be like:
        ```python
        {
            0: {
                "request": request_dict_0,
                "response": response_dict_0,
            },
            1: {
                "request": request_dict_1,
                "response": response_dict_1,
            },
        ```
                The first request's messages plus the response is equal to the second request's messages.
                For a conversation with many turns, the non-compact history dictionary has a quadratic size
                while the compact history dict has a linear size.
            reset_counter (bool): whether to reset the counter of the number of API calls.
        """
        cls._history_dict = {} if history_dict is None else history_dict
        cls._history_compact = compact
        cls._count_create = 0 if reset_counter or cls._count_create is None else cls._count_create
    @classmethod
    def stop_logging(cls):
        """End book keeping."""
        cls._history_dict = cls._count_create = None
 class ChatCompletion(Completion):
    """A class for OpenAI API ChatCompletion. Share the same API as Completion."""
    default_search_space = Completion.default_search_space.copy()
    default_search_space["model"] = tune.choice(["gpt-3.5-turbo", "gpt-4"])
    openai_completion_class = not ERROR and openai.ChatCompletion
--- a/crazy_functions/agent_fns/general.py
+++ b/crazy_functions/agent_fns/general.py
@ -9,17 +9,27 @@ def gpt_academic_generate_oai_reply(
    sender,
    config,
 ):
    from .bridge_autogen import Completion
    llm_config = self.llm_config if config is None else config
    if llm_config is False:
        return False, None
    if messages is None:
        messages = self._oai_messages[sender]
-    response = Completion.create(
+    inputs = messages[-1]['content']
-        context=messages[-1].pop("context", None), messages=self._oai_system_message + messages, **llm_config
+    history = []
    for message in messages[:-1]:
        history.append(message['content'])
    context=messages[-1].pop("context", None)
    assert context is None, "预留参数 context 未实现"
    reply = predict_no_ui_long_connection(
        inputs=inputs,
        llm_kwargs=llm_config,
        history=history,
        sys_prompt=self._oai_system_message[0]['content'],
        console_slience=True
    )
-    return True, Completion.extract_text_or_function_call(response)[0]
+    return True, reply
 class AutoGenGeneral(PluginMultiprocessManager):
    def gpt_academic_print_override(self, user_proxy, message, sender):
@ -45,32 +55,6 @@ class AutoGenGeneral(PluginMultiprocessManager):
        else:
            raise TimeoutError("等待用户输入超时")
    # def gpt_academic_generate_oai_reply(self, agent, messages, sender, config):
    #     from .bridge_autogen import Completion
    #     if messages is None:
    #         messages = agent._oai_messages[sender]
    #     def instantiate(
    #         cls,
    #         template: Union[str, None],
    #         context: Optional[Dict] = None,
    #         allow_format_str_template: Optional[bool] = False,
    #     ):
    #         if not context or template is None:
    #             return template
    #         if isinstance(template, str):
    #             return template.format(**context) if allow_format_str_template else template
    #         return template(context)
    #     res = predict_no_ui_long_connection(
    #         messages[-1].pop("context", None), 
    #         llm_kwargs=self.llm_kwargs, 
    #         history=messages, 
    #         sys_prompt=agent._oai_system_message,
    #         observe_window=None, 
    #         console_slience=False)
    #     return True, res
    def define_agents(self):
        raise NotImplementedError
@ -85,7 +69,7 @@ class AutoGenGeneral(PluginMultiprocessManager):
            for agent_kwargs in agents:
                agent_cls = agent_kwargs.pop('cls')
                kwargs = {
-                    'llm_config':{},
+                    'llm_config':self.llm_kwargs,
                    'code_execution_config':code_execution_config
                }
                kwargs.update(agent_kwargs)
--- a/crazy_functions/多智能体.py
+++ b/crazy_functions/多智能体.py
@ -41,11 +41,11 @@ def 多智能体终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
        "azure-gpt-4",
        "azure-gpt-4-32k",
    ]
    llm_kwargs['api_key'] = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
    if llm_kwargs['llm_model'] not in supported_llms:
        chatbot.append([f"处理任务: {txt}", f"当前插件只支持{str(supported_llms)}, 当前模型{llm_kwargs['llm_model']}."])
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        return
    llm_kwargs['api_key'] = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
    # 检查当前的模型是否符合要求
    API_URL_REDIRECT = get_conf('API_URL_REDIRECT')
@ -56,7 +56,9 @@ def 多智能体终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
    # 尝试导入依赖，如果缺少依赖，则给出安装建议
    try:
-        import autogen, docker
+        import autogen
        if get_conf("AUTOGEN_USE_DOCKER"):
            import docker
    except:
        chatbot.append([ f"处理任务: {txt}", 
            f"导入软件依赖失败。使用该模块需要额外依赖，安装方法```pip install --upgrade pyautogen docker```。"])
@ -67,7 +69,8 @@ def 多智能体终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
    try:
        import autogen
        import glob, os, time, subprocess
-        subprocess.Popen(['docker', '--version'])
+        if get_conf("AUTOGEN_USE_DOCKER"):
            subprocess.Popen(["docker", "--version"])
    except:
        chatbot.append([f"处理任务: {txt}", f"缺少docker运行环境！"])
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@ -548,7 +548,7 @@ def LLM_CATCH_EXCEPTION(f):
    return decorated
-def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience=False):
+def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window=[], console_slience=False):
    """
    发送至LLM，等待回复，一次性完成，不显示中间过程。但内部用stream的方法避免中途网线被掐。
    inputs：
--- a/requirements.txt
+++ b/requirements.txt
@ -15,6 +15,7 @@ Markdown
 pygments
 pymupdf
 openai
 pyautogen
 numpy
 arxiv
 rich