将autogen大模型调用底层hook掉

2023-11-11 22:01:19 +08:00 · 2023-11-11 22:01:19 +08:00 · 28119e343c
commit 28119e343c
parent 804599bbc3
8 changed files with 33 additions and 634 deletions
--- a/config.py
+++ b/config.py
@ -211,7 +211,7 @@ ALLOW_RESET_CONFIG = False


 # 在使用AutoGen插件时，是否使用Docker容器运行代码
-AUTOGEN_USE_DOCKER = True
+AUTOGEN_USE_DOCKER = False


 # 临时的上传文件夹位置，请勿修改
--- a/crazy_functional.py
+++ b/crazy_functional.py
@ -539,7 +539,6 @@ def get_crazy_functions():
    except:
        print('Load function plugin failed')

-    try:
    from crazy_functions.多智能体 import 多智能体终端
    function_plugins.update({
        "多智能体终端（微软AutoGen）": {
@ -549,8 +548,6 @@ def get_crazy_functions():
            "Function": HotReload(多智能体终端)
        }
    })
-    except:
-        print('Load function plugin failed')

    # try:
    #     from crazy_functions.chatglm微调工具 import 微调数据集生成
--- a/crazy_functions/agent_fns/auto_agent.py
+++ b/crazy_functions/agent_fns/auto_agent.py
@ -2,8 +2,6 @@ from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc,
 from toolbox import report_execption, get_log_folder, update_ui_lastest_msg, Singleton
 from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
 from crazy_functions.agent_fns.general import AutoGenGeneral
-import time
-from autogen import AssistantAgent, UserProxyAgent



--- a/crazy_functions/agent_fns/bridge_autogen.py
+++ b/crazy_functions/agent_fns/bridge_autogen.py
@ -1,584 +0,0 @@
-from time import sleep
-import logging
-import time
-from typing import List, Optional, Dict, Callable, Union
-import sys
-import shutil
-import numpy as np
-from flaml import tune, BlendSearch
-from flaml.tune.space import is_constant
-from flaml.automl.logger import logger_formatter
-from collections import defaultdict
-
-try:
-    import openai
-    from openai.error import (
-        ServiceUnavailableError,
-        RateLimitError,
-        APIError,
-        InvalidRequestError,
-        APIConnectionError,
-        Timeout,
-        AuthenticationError,
-    )
-    from openai import Completion as openai_Completion
-    import diskcache
-
-    ERROR = None
-except ImportError:
-    ERROR = ImportError("please install openai and diskcache to use the autogen.oai subpackage.")
-    openai_Completion = object
-logger = logging.getLogger(__name__)
-if not logger.handlers:
-    # Add the console handler.
-    _ch = logging.StreamHandler(stream=sys.stdout)
-    _ch.setFormatter(logger_formatter)
-    logger.addHandler(_ch)
-
-
-class Completion(openai_Completion):
-    """A class for OpenAI completion API.
-
-    It also supports: ChatCompletion, Azure OpenAI API.
-    """
-
-    # set of models that support chat completion
-    chat_models = {
-        "gpt-3.5-turbo",
-        "gpt-3.5-turbo-0301",  # deprecate in Sep
-        "gpt-3.5-turbo-0613",
-        "gpt-3.5-turbo-16k",
-        "gpt-3.5-turbo-16k-0613",
-        "gpt-35-turbo",
-        "gpt-35-turbo-16k",
-        "gpt-4",
-        "gpt-4-32k",
-        "gpt-4-32k-0314",  # deprecate in Sep
-        "gpt-4-0314",  # deprecate in Sep
-        "gpt-4-0613",
-        "gpt-4-32k-0613",
-    }
-
-    # price per 1k tokens
-    price1K = {
-        "text-ada-001": 0.0004,
-        "text-babbage-001": 0.0005,
-        "text-curie-001": 0.002,
-        "code-cushman-001": 0.024,
-        "code-davinci-002": 0.1,
-        "text-davinci-002": 0.02,
-        "text-davinci-003": 0.02,
-        "gpt-3.5-turbo": (0.0015, 0.002),
-        "gpt-3.5-turbo-instruct": (0.0015, 0.002),
-        "gpt-3.5-turbo-0301": (0.0015, 0.002),  # deprecate in Sep
-        "gpt-3.5-turbo-0613": (0.0015, 0.002),
-        "gpt-3.5-turbo-16k": (0.003, 0.004),
-        "gpt-3.5-turbo-16k-0613": (0.003, 0.004),
-        "gpt-35-turbo": (0.0015, 0.002),
-        "gpt-35-turbo-16k": (0.003, 0.004),
-        "gpt-35-turbo-instruct": (0.0015, 0.002),
-        "gpt-4": (0.03, 0.06),
-        "gpt-4-32k": (0.06, 0.12),
-        "gpt-4-0314": (0.03, 0.06),  # deprecate in Sep
-        "gpt-4-32k-0314": (0.06, 0.12),  # deprecate in Sep
-        "gpt-4-0613": (0.03, 0.06),
-        "gpt-4-32k-0613": (0.06, 0.12),
-    }
-
-    default_search_space = {
-        "model": tune.choice(
-            [
-                "text-ada-001",
-                "text-babbage-001",
-                "text-davinci-003",
-                "gpt-3.5-turbo",
-                "gpt-4",
-            ]
-        ),
-        "temperature_or_top_p": tune.choice(
-            [
-                {"temperature": tune.uniform(0, 2)},
-                {"top_p": tune.uniform(0, 1)},
-            ]
-        ),
-        "max_tokens": tune.lograndint(50, 1000),
-        "n": tune.randint(1, 100),
-        "prompt": "{prompt}",
-    }
-
-    seed = 41
-    cache_path = f".cache/{seed}"
-    # retry after this many seconds
-    retry_wait_time = 10
-    # fail a request after hitting RateLimitError for this many seconds
-    max_retry_period = 120
-    # time out for request to openai server
-    request_timeout = 60
-
-    openai_completion_class = not ERROR and openai.Completion
-    _total_cost = 0
-    optimization_budget = None
-
-    _history_dict = _count_create = None
-
-    @classmethod
-    def set_cache(cls, seed: Optional[int] = 41, cache_path_root: Optional[str] = ".cache"):
-        """Set cache path.
-
-        Args:
-            seed (int, Optional): The integer identifier for the pseudo seed.
-                Results corresponding to different seeds will be cached in different places.
-            cache_path (str, Optional): The root path for the cache.
-                The complete cache path will be {cache_path}/{seed}.
-        """
-        cls.seed = seed
-        cls.cache_path = f"{cache_path_root}/{seed}"
-
-    @classmethod
-    def clear_cache(cls, seed: Optional[int] = None, cache_path_root: Optional[str] = ".cache"):
-        """Clear cache.
-
-        Args:
-            seed (int, Optional): The integer identifier for the pseudo seed.
-                If omitted, all caches under cache_path_root will be cleared.
-            cache_path (str, Optional): The root path for the cache.
-                The complete cache path will be {cache_path}/{seed}.
-        """
-        if seed is None:
-            shutil.rmtree(cache_path_root, ignore_errors=True)
-            return
-        with diskcache.Cache(f"{cache_path_root}/{seed}") as cache:
-            cache.clear()
-
-    @classmethod
-    def _book_keeping(cls, config: Dict, response):
-        """Book keeping for the created completions."""
-        if response != -1 and "cost" not in response:
-            response["cost"] = cls.cost(response)
-        if cls._history_dict is None:
-            return
-        if cls._history_compact:
-            value = {
-                "created_at": [],
-                "cost": [],
-                "token_count": [],
-            }
-            if "messages" in config:
-                messages = config["messages"]
-                if len(messages) > 1 and messages[-1]["role"] != "assistant":
-                    existing_key = get_key(messages[:-1])
-                    value = cls._history_dict.pop(existing_key, value)
-                key = get_key(messages + [choice["message"] for choice in response["choices"]])
-            else:
-                key = get_key([config["prompt"]] + [choice.get("text") for choice in response["choices"]])
-            value["created_at"].append(cls._count_create)
-            value["cost"].append(response["cost"])
-            value["token_count"].append(
-                {
-                    "model": response["model"],
-                    "prompt_tokens": response["usage"]["prompt_tokens"],
-                    "completion_tokens": response["usage"].get("completion_tokens", 0),
-                    "total_tokens": response["usage"]["total_tokens"],
-                }
-            )
-            cls._history_dict[key] = value
-            cls._count_create += 1
-            return
-        cls._history_dict[cls._count_create] = {
-            "request": config,
-            "response": response.to_dict_recursive(),
-        }
-        cls._count_create += 1
-
-    @classmethod
-    def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_cache=True):
-        """Get the response from the openai api call.
-
-        Try cache first. If not found, call the openai api. If the api call fails, retry after retry_wait_time.
-        """
-        config = config.copy()
-        
-
-    @classmethod
-    def _get_max_valid_n(cls, key, max_tokens):
-        # find the max value in max_valid_n_per_max_tokens
-        # whose key is equal or larger than max_tokens
-        return max(
-            (value for k, value in cls._max_valid_n_per_max_tokens.get(key, {}).items() if k >= max_tokens),
-            default=1,
-        )
-
-    @classmethod
-    def _get_min_invalid_n(cls, key, max_tokens):
-        # find the min value in min_invalid_n_per_max_tokens
-        # whose key is equal or smaller than max_tokens
-        return min(
-            (value for k, value in cls._min_invalid_n_per_max_tokens.get(key, {}).items() if k <= max_tokens),
-            default=None,
-        )
-
-    @classmethod
-    def _get_region_key(cls, config):
-        # get a key for the valid/invalid region corresponding to the given config
-        config = cls._pop_subspace(config, always_copy=False)
-        return (
-            config["model"],
-            config.get("prompt", config.get("messages")),
-            config.get("stop"),
-        )
-
-    @classmethod
-    def _update_invalid_n(cls, prune, region_key, max_tokens, num_completions):
-        if prune:
-            # update invalid n and prune this config
-            cls._min_invalid_n_per_max_tokens[region_key] = invalid_n = cls._min_invalid_n_per_max_tokens.get(
-                region_key, {}
-            )
-            invalid_n[max_tokens] = min(num_completions, invalid_n.get(max_tokens, np.inf))
-
-    @classmethod
-    def _pop_subspace(cls, config, always_copy=True):
-        if "subspace" in config:
-            config = config.copy()
-            config.update(config.pop("subspace"))
-        return config.copy() if always_copy else config
-
-    @classmethod
-    def _get_params_for_create(cls, config: Dict) -> Dict:
-        """Get the params for the openai api call from a config in the search space."""
-        params = cls._pop_subspace(config)
-        if cls._prompts:
-            params["prompt"] = cls._prompts[config["prompt"]]
-        else:
-            params["messages"] = cls._messages[config["messages"]]
-        if "stop" in params:
-            params["stop"] = cls._stops and cls._stops[params["stop"]]
-        temperature_or_top_p = params.pop("temperature_or_top_p", None)
-        if temperature_or_top_p:
-            params.update(temperature_or_top_p)
-        if cls._config_list and "config_list" not in params:
-            params["config_list"] = cls._config_list
-        return params
-
-    @classmethod
-    def create(
-        cls,
-        context: Optional[Dict] = None,
-        use_cache: Optional[bool] = True,
-        config_list: Optional[List[Dict]] = None,
-        filter_func: Optional[Callable[[Dict, Dict, Dict], bool]] = None,
-        raise_on_ratelimit_or_timeout: Optional[bool] = True,
-        allow_format_str_template: Optional[bool] = False,
-        **config,
-    ):
-        """Make a completion for a given context.
-
-        Args:
-            context (Dict, Optional): The context to instantiate the prompt.
-                It needs to contain keys that are used by the prompt template or the filter function.
-                E.g., `prompt="Complete the following sentence: {prefix}, context={"prefix": "Today I feel"}`.
-                The actual prompt will be:
-                "Complete the following sentence: Today I feel".
-                More examples can be found at [templating](https://microsoft.github.io/autogen/docs/Use-Cases/enhanced_inference#templating).
-            use_cache (bool, Optional): Whether to use cached responses.
-            config_list (List, Optional): List of configurations for the completion to try.
-                The first one that does not raise an error will be used.
-                Only the differences from the default config need to be provided.
-                E.g.,
-
-        ```python
-        response = oai.Completion.create(
-            config_list=[
-                {
-                    "model": "gpt-4",
-                    "api_key": os.environ.get("AZURE_OPENAI_API_KEY"),
-                    "api_type": "azure",
-                    "api_base": os.environ.get("AZURE_OPENAI_API_BASE"),
-                    "api_version": "2023-03-15-preview",
-                },
-                {
-                    "model": "gpt-3.5-turbo",
-                    "api_key": os.environ.get("OPENAI_API_KEY"),
-                    "api_type": "open_ai",
-                    "api_base": "https://api.openai.com/v1",
-                },
-                {
-                    "model": "llama-7B",
-                    "api_base": "http://127.0.0.1:8080",
-                    "api_type": "open_ai",
-                }
-            ],
-            prompt="Hi",
-        )
-        ```
-
-            filter_func (Callable, Optional): A function that takes in the context, the config and the response and returns a boolean to indicate whether the response is valid. E.g.,
-
-        ```python
-        def yes_or_no_filter(context, config, response):
-            return context.get("yes_or_no_choice", False) is False or any(
-                text in ["Yes.", "No."] for text in oai.Completion.extract_text(response)
-            )
-        ```
-
-            raise_on_ratelimit_or_timeout (bool, Optional): Whether to raise RateLimitError or Timeout when all configs fail.
-                When set to False, -1 will be returned when all configs fail.
-            allow_format_str_template (bool, Optional): Whether to allow format string template in the config.
-            **config: Configuration for the openai API call. This is used as parameters for calling openai API.
-                The "prompt" or "messages" parameter can contain a template (str or Callable) which will be instantiated with the context.
-                Besides the parameters for the openai API call, it can also contain:
-                - `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
-                - `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
-                - `seed` (int) for the cache. This is useful when implementing "controlled randomness" for the completion.
-
-        Returns:
-            Responses from OpenAI API, with additional fields.
-                - `cost`: the total cost.
-            When `config_list` is provided, the response will contain a few more fields:
-                - `config_id`: the index of the config in the config_list that is used to generate the response.
-                - `pass_filter`: whether the response passes the filter function. None if no filter is provided.
-        """
-        if ERROR:
-            raise ERROR
-        config_list = [
-            {
-                "model": "llama-7B",
-                "api_base": "http://127.0.0.1:8080",
-                "api_type": "open_ai",
-            }
-        ]
-        last = len(config_list) - 1
-        cost = 0
-        for i, each_config in enumerate(config_list):
-            base_config = config.copy()
-            base_config["allow_format_str_template"] = allow_format_str_template
-            base_config.update(each_config)
-            if i < last and filter_func is None and "max_retry_period" not in base_config:
-                # max_retry_period = 0 to avoid retrying when no filter is given
-                base_config["max_retry_period"] = 0
-            try:
-                response = cls.create(
-                    context,
-                    use_cache,
-                    raise_on_ratelimit_or_timeout=i < last or raise_on_ratelimit_or_timeout,
-                    **base_config,
-                )
-                if response == -1:
-                    return response
-                pass_filter = filter_func is None or filter_func(
-                    context=context, base_config=config, response=response
-                )
-                if pass_filter or i == last:
-                    response["cost"] = cost + response["cost"]
-                    response["config_id"] = i
-                    response["pass_filter"] = pass_filter
-                    return response
-                cost += response["cost"]
-            except (AuthenticationError, RateLimitError, Timeout, InvalidRequestError):
-                logger.debug(f"failed with config {i}", exc_info=1)
-                if i == last:
-                    raise
-
-        params = cls._construct_params(context, config, allow_format_str_template=allow_format_str_template)
-        if not use_cache:
-            return cls._get_response(
-                params, raise_on_ratelimit_or_timeout=raise_on_ratelimit_or_timeout, use_cache=False
-            )
-        seed = cls.seed
-        if "seed" in params:
-            cls.set_cache(params.pop("seed"))
-        with diskcache.Cache(cls.cache_path) as cls._cache:
-            cls.set_cache(seed)
-            return cls._get_response(params, raise_on_ratelimit_or_timeout=raise_on_ratelimit_or_timeout)
-
-    @classmethod
-    def instantiate(
-        cls,
-        template: Union[str, None],
-        context: Optional[Dict] = None,
-        allow_format_str_template: Optional[bool] = False,
-    ):
-        if not context or template is None:
-            return template
-        if isinstance(template, str):
-            return template.format(**context) if allow_format_str_template else template
-        return template(context)
-
-    @classmethod
-    def _construct_params(cls, context, config, prompt=None, messages=None, allow_format_str_template=False):
-        params = config.copy()
-        model = config["model"]
-        prompt = config.get("prompt") if prompt is None else prompt
-        messages = config.get("messages") if messages is None else messages
-        # either "prompt" should be in config (for being compatible with non-chat models)
-        # or "messages" should be in config (for tuning chat models only)
-        if prompt is None and (model in cls.chat_models or issubclass(cls, ChatCompletion)):
-            if messages is None:
-                raise ValueError("Either prompt or messages should be in config for chat models.")
-        if prompt is None:
-            params["messages"] = (
-                [
-                    {
-                        **m,
-                        "content": cls.instantiate(m["content"], context, allow_format_str_template),
-                    }
-                    if m.get("content")
-                    else m
-                    for m in messages
-                ]
-                if context
-                else messages
-            )
-        elif model in cls.chat_models or issubclass(cls, ChatCompletion):
-            # convert prompt to messages
-            params["messages"] = [
-                {
-                    "role": "user",
-                    "content": cls.instantiate(prompt, context, allow_format_str_template),
-                },
-            ]
-            params.pop("prompt", None)
-        else:
-            params["prompt"] = cls.instantiate(prompt, context, allow_format_str_template)
-        return params
-
-    @classmethod
-    def extract_text(cls, response: dict) -> List[str]:
-        """Extract the text from a completion or chat response.
-
-        Args:
-            response (dict): The response from OpenAI API.
-
-        Returns:
-            A list of text in the responses.
-        """
-        choices = response["choices"]
-        if "text" in choices[0]:
-            return [choice["text"] for choice in choices]
-        return [choice["message"].get("content", "") for choice in choices]
-
-    @classmethod
-    def extract_text_or_function_call(cls, response: dict) -> List[str]:
-        """Extract the text or function calls from a completion or chat response.
-
-        Args:
-            response (dict): The response from OpenAI API.
-
-        Returns:
-            A list of text or function calls in the responses.
-        """
-        choices = response["choices"]
-        if "text" in choices[0]:
-            return [choice["text"] for choice in choices]
-        return [
-            choice["message"] if "function_call" in choice["message"] else choice["message"].get("content", "")
-            for choice in choices
-        ]
-
-    @classmethod
-    @property
-    def logged_history(cls) -> Dict:
-        """Return the book keeping dictionary."""
-        return cls._history_dict
-
-    @classmethod
-    def print_usage_summary(cls) -> Dict:
-        """Return the usage summary."""
-        if cls._history_dict is None:
-            print("No usage summary available.", flush=True)
-
-        token_count_summary = defaultdict(lambda: {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0})
-
-        if not cls._history_compact:
-            source = cls._history_dict.values()
-            total_cost = sum(msg_pair["response"]["cost"] for msg_pair in source)
-        else:
-            # source = cls._history_dict["token_count"]
-            # total_cost = sum(cls._history_dict['cost'])
-            total_cost = sum(sum(value_list["cost"]) for value_list in cls._history_dict.values())
-            source = (
-                token_data for value_list in cls._history_dict.values() for token_data in value_list["token_count"]
-            )
-
-        for entry in source:
-            if not cls._history_compact:
-                model = entry["response"]["model"]
-                token_data = entry["response"]["usage"]
-            else:
-                model = entry["model"]
-                token_data = entry
-
-            token_count_summary[model]["prompt_tokens"] += token_data["prompt_tokens"]
-            token_count_summary[model]["completion_tokens"] += token_data["completion_tokens"]
-            token_count_summary[model]["total_tokens"] += token_data["total_tokens"]
-
-        print(f"Total cost: {total_cost}", flush=True)
-        for model, counts in token_count_summary.items():
-            print(
-                f"Token count summary for model {model}: prompt_tokens: {counts['prompt_tokens']}, completion_tokens: {counts['completion_tokens']}, total_tokens: {counts['total_tokens']}",
-                flush=True,
-            )
-
-    @classmethod
-    def start_logging(
-        cls, history_dict: Optional[Dict] = None, compact: Optional[bool] = True, reset_counter: Optional[bool] = True
-    ):
-        """Start book keeping.
-
-        Args:
-            history_dict (Dict): A dictionary for book keeping.
-                If no provided, a new one will be created.
-            compact (bool): Whether to keep the history dictionary compact.
-                Compact history contains one key per conversation, and the value is a dictionary
-                like:
-        ```python
-        {
-            "create_at": [0, 1],
-            "cost": [0.1, 0.2],
-        }
-        ```
-                where "created_at" is the index of API calls indicating the order of all the calls,
-                and "cost" is the cost of each call. This example shows that the conversation is based
-                on two API calls. The compact format is useful for condensing the history of a conversation.
-                If compact is False, the history dictionary will contain all the API calls: the key
-                is the index of the API call, and the value is a dictionary like:
-        ```python
-        {
-            "request": request_dict,
-            "response": response_dict,
-        }
-        ```
-                where request_dict is the request sent to OpenAI API, and response_dict is the response.
-                For a conversation containing two API calls, the non-compact history dictionary will be like:
-        ```python
-        {
-            0: {
-                "request": request_dict_0,
-                "response": response_dict_0,
-            },
-            1: {
-                "request": request_dict_1,
-                "response": response_dict_1,
-            },
-        ```
-                The first request's messages plus the response is equal to the second request's messages.
-                For a conversation with many turns, the non-compact history dictionary has a quadratic size
-                while the compact history dict has a linear size.
-            reset_counter (bool): whether to reset the counter of the number of API calls.
-        """
-        cls._history_dict = {} if history_dict is None else history_dict
-        cls._history_compact = compact
-        cls._count_create = 0 if reset_counter or cls._count_create is None else cls._count_create
-
-    @classmethod
-    def stop_logging(cls):
-        """End book keeping."""
-        cls._history_dict = cls._count_create = None
-
-
-class ChatCompletion(Completion):
-    """A class for OpenAI API ChatCompletion. Share the same API as Completion."""
-
-    default_search_space = Completion.default_search_space.copy()
-    default_search_space["model"] = tune.choice(["gpt-3.5-turbo", "gpt-4"])
-    openai_completion_class = not ERROR and openai.ChatCompletion
--- a/crazy_functions/agent_fns/general.py
+++ b/crazy_functions/agent_fns/general.py
@ -9,17 +9,27 @@ def gpt_academic_generate_oai_reply(
    sender,
    config,
 ):
-    from .bridge_autogen import Completion
    llm_config = self.llm_config if config is None else config
    if llm_config is False:
        return False, None
    if messages is None:
        messages = self._oai_messages[sender]

-    response = Completion.create(
-        context=messages[-1].pop("context", None), messages=self._oai_system_message + messages, **llm_config
+    inputs = messages[-1]['content']
+    history = []
+    for message in messages[:-1]:
+        history.append(message['content'])
+    context=messages[-1].pop("context", None)
+    assert context is None, "预留参数 context 未实现"
+
+    reply = predict_no_ui_long_connection(
+        inputs=inputs,
+        llm_kwargs=llm_config,
+        history=history,
+        sys_prompt=self._oai_system_message[0]['content'],
+        console_slience=True
    )
-    return True, Completion.extract_text_or_function_call(response)[0]
+    return True, reply

 class AutoGenGeneral(PluginMultiprocessManager):
    def gpt_academic_print_override(self, user_proxy, message, sender):
@ -45,32 +55,6 @@ class AutoGenGeneral(PluginMultiprocessManager):
        else:
            raise TimeoutError("等待用户输入超时")

-    # def gpt_academic_generate_oai_reply(self, agent, messages, sender, config):
-    #     from .bridge_autogen import Completion
-    #     if messages is None:
-    #         messages = agent._oai_messages[sender]
-
-    #     def instantiate(
-    #         cls,
-    #         template: Union[str, None],
-    #         context: Optional[Dict] = None,
-    #         allow_format_str_template: Optional[bool] = False,
-    #     ):
-    #         if not context or template is None:
-    #             return template
-    #         if isinstance(template, str):
-    #             return template.format(**context) if allow_format_str_template else template
-    #         return template(context)
-
-    #     res = predict_no_ui_long_connection(
-    #         messages[-1].pop("context", None), 
-    #         llm_kwargs=self.llm_kwargs, 
-    #         history=messages, 
-    #         sys_prompt=agent._oai_system_message,
-    #         observe_window=None, 
-    #         console_slience=False)
-    #     return True, res
-
    def define_agents(self):
        raise NotImplementedError

@ -85,7 +69,7 @@ class AutoGenGeneral(PluginMultiprocessManager):
            for agent_kwargs in agents:
                agent_cls = agent_kwargs.pop('cls')
                kwargs = {
-                    'llm_config':{},
+                    'llm_config':self.llm_kwargs,
                    'code_execution_config':code_execution_config
                }
                kwargs.update(agent_kwargs)
--- a/crazy_functions/多智能体.py
+++ b/crazy_functions/多智能体.py
@ -41,11 +41,11 @@ def 多智能体终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
        "azure-gpt-4",
        "azure-gpt-4-32k",
    ]
-    llm_kwargs['api_key'] = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
    if llm_kwargs['llm_model'] not in supported_llms:
        chatbot.append([f"处理任务: {txt}", f"当前插件只支持{str(supported_llms)}, 当前模型{llm_kwargs['llm_model']}."])
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        return
+    llm_kwargs['api_key'] = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
    
    # 检查当前的模型是否符合要求
    API_URL_REDIRECT = get_conf('API_URL_REDIRECT')
@ -56,7 +56,9 @@ def 多智能体终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
    
    # 尝试导入依赖，如果缺少依赖，则给出安装建议
    try:
-        import autogen, docker
+        import autogen
+        if get_conf("AUTOGEN_USE_DOCKER"):
+            import docker
    except:
        chatbot.append([ f"处理任务: {txt}", 
            f"导入软件依赖失败。使用该模块需要额外依赖，安装方法```pip install --upgrade pyautogen docker```。"])
@ -67,7 +69,8 @@ def 多智能体终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_
    try:
        import autogen
        import glob, os, time, subprocess
-        subprocess.Popen(['docker', '--version'])
+        if get_conf("AUTOGEN_USE_DOCKER"):
+            subprocess.Popen(["docker", "--version"])
    except:
        chatbot.append([f"处理任务: {txt}", f"缺少docker运行环境！"])
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
--- a/request_llms/bridge_all.py
+++ b/request_llms/bridge_all.py
@ -548,7 +548,7 @@ def LLM_CATCH_EXCEPTION(f):
    return decorated


-def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience=False):
+def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window=[], console_slience=False):
    """
    发送至LLM，等待回复，一次性完成，不显示中间过程。但内部用stream的方法避免中途网线被掐。
    inputs：
--- a/requirements.txt
+++ b/requirements.txt
@ -15,6 +15,7 @@ Markdown
 pygments
 pymupdf
 openai
+pyautogen
 numpy
 arxiv
 rich