The agent class

`Agent`

Agent handles the behaviour of the model and how it interacts with the environment.

Source code in sweagent/agent/agents.py

class Agent:
    """Agent handles the behaviour of the model and how it interacts with the environment."""

    def __init__(self, name: str, args: AgentArguments):
        self.name = name
        self.model = get_model(args.model, args.config._commands + args.config.subroutine_types)
        self.config = args.config
        assert self.config is not None  # mypy
        self.system_args = {
            "command_docs": self.config.command_docs,
            **self.config.env_variables,
        }
        self.instance_args = None
        self._parse_command_patterns()
        self.history = []
        self.last_container_id = None
        self.hooks = []
        self.logger = get_logger("agent")

    def add_hook(self, hook: AgentHook):
        """Add hook to agent"""
        hook.on_init()
        self.hooks.append(hook)

    def _append_history(self, item: dict):
        for hook in self.hooks:
            hook.on_query_message_added(**item)
        self.history.append(item)

    def setup(self, instance_args, init_model_stats=None) -> None:
        """Setup the agent for a new instance. This includes
        formatting the system message and adding demonstrations to the history.

        Args:
            instance_args: Arguments for the instance
        """
        assert self.config is not None  # mypy
        self.model.reset_stats(init_model_stats)
        self.instance_args = instance_args

        system_msg = self.config.system_template.format(**self.system_args)
        self.logger.info(f"SYSTEM ({self.name})\n{system_msg}")

        self.history: list[dict[str, Any]] = []
        self._append_history({"role": "system", "content": system_msg, "agent": self.name})

        if "history_to_messages" in dir(self.model):
            for demonstration_path in self.config.demonstrations:
                if self.config.demonstration_template is None and not self.config.put_demos_in_history:
                    msg = "Cannot use demonstrations without a demonstration template or put_demos_in_history=True"
                    raise ValueError(msg)

                # Load history
                self.logger.info(f"DEMONSTRATION: {demonstration_path}")
                demo_history = json.loads(Path(demonstration_path).read_text())["history"]
                demo_history = [
                    entry
                    for entry in demo_history
                    if ("agent" not in entry) or ("agent" in entry and entry["agent"] == self.name)
                ]

                if self.config.put_demos_in_history:
                    if self.config.demonstration_template is not None:
                        self.logger.warning("Demonstration template is ignored for put_demos_in_history=True")
                    # Add demonstration to history directly as separate messages
                    for entry in demo_history:
                        if entry["role"] != "system":
                            entry["is_demo"] = True
                            self._append_history(entry)
                else:
                    # Add demonstration as single message to history
                    demo_message = self.model.history_to_messages(
                        demo_history,
                        is_demonstration=True,
                    )
                    demonstration = self.config.demonstration_template.format(demonstration=demo_message)
                    self._append_history(
                        {
                            "agent": self.name,
                            "content": demonstration,
                            "is_demo": True,
                            "role": "user",
                        },
                    )

    @property
    def state_command(self) -> str:
        """Return the bash command that will be used to extract the environment state."""
        return self.config.state_command.name

    @property
    def local_history(self) -> list[dict[str, str]]:
        """Return the history of the agent since the last reset."""
        return self.config.history_processor([entry for entry in self.history if entry["agent"] == self.name])

    def save_trajectory(
        self, trajectory: list[dict[str, Any]], log_path: Path, env_name: str, info: dict[str, Any]
    ) -> None:
        """Save the trajectory"""
        log_dict = {
            "environment": env_name,
            "trajectory": trajectory,
            "history": self.history,
            "info": info,
        }
        log_path.write_text(json.dumps(log_dict, indent=2))

    def _get_first_match(self, action: str, pattern_type: str) -> re.Match | None:
        """Return the first match of a command pattern in the action string."""
        assert self.config is not None  # mypy
        if pattern_type == "subroutine":
            patterns = {k: v for k, v in self.subroutine_patterns.items()}
        elif pattern_type == "multi_line":
            patterns = {
                k: v
                for k, v in self.command_patterns.items()
                if k in self.config.multi_line_command_endings or k == self.config.submit_command
            }
            patterns += {
                k: v for k, v in self.subroutine_patterns.items() if k in self.config.multi_line_command_endings
            }
        elif pattern_type == "multi_line_no_subroutines":
            patterns = {k: v for k, v in self.command_patterns.items() if k in self.config.multi_line_command_endings}
        else:
            msg = f"Unknown pattern type: {pattern_type}"
            raise ValueError(msg)
        matches = list()
        for _, pat in patterns.items():
            match = pat.search(action)
            if match:
                matches.append(match)
        if len(matches) == 0:
            return None
        matches = sorted(matches, key=lambda x: x.start())
        return matches[0]

    def _guard_multiline_input(self, action: str) -> str:
        """Split action by multiline commands, then append the first line in each multiline command with "<< '{end_name}'".
        Multiline commands (which are specified by an end_name) are commands that span multiple lines and are terminated by a specific end_name.

        Their multi-line argument is sent using a heredoc, which is a way to send a multi-line string to a command in bash.
        """
        parsed_action = list()
        rem_action = action
        while rem_action.strip():
            first_match = self._get_first_match(rem_action, "multi_line_no_subroutines")
            if first_match:
                pre_action = rem_action[: first_match.start()]
                match_action = rem_action[first_match.start() : first_match.end()]
                rem_action = rem_action[first_match.end() :]
                if pre_action.strip():
                    parsed_action.append(pre_action)
                if match_action.strip():
                    eof = first_match.group(3).strip()
                    if not match_action.split("\n")[0].strip().endswith(f"<< '{eof}'"):
                        guarded_command = match_action[first_match.start() :]
                        first_line = guarded_command.split("\n")[0]
                        guarded_command = guarded_command.replace(first_line, first_line + f" << '{eof}'", 1)
                        parsed_action.append(guarded_command)
                    else:
                        parsed_action.append(match_action)
            else:
                parsed_action.append(rem_action)
                rem_action = ""
        return "\n".join(parsed_action)

    def split_actions(self, action: str, pattern_type="subroutine") -> list[dict[str, Any]]:
        """Split an action into a list of actions in a greedy manner, each of which is a subroutine call or a single command."""
        parsed_action = list()
        rem_action = action
        while rem_action.strip():
            first_match = self._get_first_match(rem_action, pattern_type)
            if first_match:
                pre_action = rem_action[: first_match.start()]
                match_action = rem_action[first_match.start() : first_match.end()]
                rem_action = rem_action[first_match.end() :]
                if pre_action.strip():
                    parsed_action.append({"agent": self.name, "action": pre_action, "cmd_name": None})
                if match_action.strip():
                    if match_action.split()[0] == self.config.submit_command:
                        parsed_action.append(
                            {
                                "agent": self.name,
                                "action": match_action,
                                "cmd_name": first_match.group(1),
                            },
                        )  # submit command is not a subroutine
                    else:
                        parsed_action.append(
                            {
                                "agent": first_match.group(1),
                                "args": first_match.group(2),
                                "action": match_action,
                                "cmd_name": first_match.group(1),
                            },
                        )
            else:
                parsed_action.append({"agent": self.name, "action": rem_action, "cmd_name": None})
                rem_action = ""
        return parsed_action

    def _parse_command_patterns(self) -> None:
        assert self.config is not None  # mypy
        self.command_patterns = dict()
        for command in self.config._commands:
            if command.end_name is not None:
                pat = re.compile(
                    rf"^\s*({command.name})\s*(.*?)^({command.end_name})\s*$",
                    re.DOTALL | re.MULTILINE,
                )
                self.command_patterns[command.name] = pat
            else:
                pat = re.compile(rf"^\s*({command.name})\s*(.*?)$", re.MULTILINE)
                self.command_patterns[command.name] = pat
        self.subroutine_patterns = dict()
        for _, subroutine in self.config._subroutines.items():
            if subroutine.end_name is None:
                pat = re.compile(rf"^\s*({subroutine.name})\s*(.*?)$", re.MULTILINE)
                self.subroutine_patterns[subroutine.name,] = pat
            else:
                pat = re.compile(
                    rf"^\s*({subroutine.name})\s*(.*?)^({subroutine.end_name})\s*$",
                    re.DOTALL | re.MULTILINE,
                )
                self.subroutine_patterns[subroutine.name] = pat
        if hasattr(self.config, "submit_command_end_name"):
            submit_pat = re.compile(
                rf"^\s*({self.config.submit_command})\s*(.*?)^({self.config.submit_command_end_name})\s*$",
                re.DOTALL | re.MULTILINE,
            )
        else:
            submit_pat = re.compile(rf"^\s*({self.config.submit_command})(\s*)$", re.MULTILINE)  # group 2 is nothing
        self.subroutine_patterns[self.config.submit_command] = submit_pat
        self.command_patterns[self.config.submit_command] = submit_pat

    def forward(self, observation: str, available_actions: list[str], state: str) -> tuple[str, str, str]:
        """Forwards the model

        Args:
            observation: Observation
            available_actions: Currently not used
            state:

        Returns:
            thought: model reasoning
            action: action that the model proposes
            output: raw model output
        """
        thought, action, output = self.forward_with_error_check(observation, state)

        self._append_history(
            {
                "role": "assistant",
                "content": output,
                "thought": thought,
                "action": action,
                "agent": self.name,
            },
        )

        self.logger.info(f"💭 THOUGHT ({self.name})\n{thought}")
        self.logger.info(f"🎬 ACTION ({self.name})\n{action}")

        return thought, action, output

    def forward_model(self, observation: str, state: str) -> str:
        """Query the model with the current state and observation with the appropriate template.

        Returns:
            output: raw model output
        """
        assert self.config is not None  # mypy

        state_vars = json.loads(state)

        templates: list[str] = []
        # Determine observation template based on what prior observation was
        if self.history[-1]["role"] == "system" or self.history[-1].get("is_demo", False):
            # Show instance template if prev. obs. was initial system message
            templates = [self.config.instance_template]
            if self.config.strategy_template is not None:
                templates.append(self.config.strategy_template)
        elif observation is None or observation.strip() == "":
            # Show no output template if observation content was empty
            templates = [self.config.next_step_no_output_template]
        else:
            # Show standard output template if there is observation content
            templates = [self.config.next_step_template]

        # Populate selected template(s) with information (e.g., issue, arguments, state)
        messages = []
        for template in templates:
            messages.append(
                template.format(
                    **self.instance_args,
                    **self.system_args,
                    **state_vars,
                    observation=(observation if observation is not None else ""),
                ),
            )

        message = "\n".join(messages)

        self.logger.info(f"🤖 MODEL INPUT\n{message}")
        self._append_history({"role": "user", "content": message, "agent": self.name})

        for hook in self.hooks:
            hook.on_model_query(query=self.local_history, agent=self.name)
        return self.model.query(self.local_history)

    def retry_after_format_fail(self, output: str) -> str:
        """Ask the model to correct (without committing to persistent history) after a malformatted model output"""
        format_error_template = self.config.format_error_template

        self.logger.warning(f"MALFORMED OUTPUT\n{output}")
        self.logger.warning(f"FORMAT ERROR\n{format_error_template}")

        temp_history = self.local_history + [
            {"role": "assistant", "content": output, "agent": self.name},
            {"role": "user", "content": format_error_template, "agent": self.name},
        ]
        return self.model.query(temp_history)

    def retry_after_blocklist_fail(self, output: str, action: str) -> str:
        """Ask the model to correct (without committing to persistent history) after a disallowed command"""
        name = action.strip().split()[0]
        blocklist_error_message = self.config.blocklist_error_template.format(name=name)

        self.logger.warning(f"BLOCKLISTED OUTPUT\n{output}")
        self.logger.warning(f"BLOCKLIST ERROR\n{blocklist_error_message}")

        temp_history = self.local_history + [
            {"role": "assistant", "content": output, "agent": self.name},
            {"role": "user", "content": blocklist_error_message, "agent": self.name},
        ]
        return self.model.query(temp_history)

    def should_block_action(self, action: str) -> bool:
        """Check if the command should be blocked."""
        names = action.strip().split()
        if len(names) == 0:
            return False
        name = names[0]
        if name in self.config.blocklist:
            return True
        if name in self.config.blocklist_standalone and name == action.strip():
            return True
        return False

    def check_format_and_requery(
        self,
        output: str,
    ) -> tuple[str, str, str]:
        """Query the model with the current state and observation with the appropriate template.

        Try to parse the output into a thought and action. Retry if the output is malformatted or the action is blocked.

        Returns:
            thought: model reasoning
            action: action that the model proposes
            output: raw model output
        """
        # Condition for handling outputs with no thought (just action)
        if self.model.args.model_name == "human":
            return "", output, output
        elif self.model.args.model_name == "human_thought":
            thought, action = ParseFunction.get("ThoughtActionParser")(
                output,
                self.config._commands + self.config.subroutine_types,
                strict=False,
            )
            return thought, action, output

        format_fails = blocklist_fails = 0

        while format_fails + blocklist_fails <= 2:
            try:
                thought, action = self.config.parse_function(
                    output,
                    self.config._commands + self.config.subroutine_types,
                    strict=False,
                )
            except KeyboardInterrupt:
                raise
            except FormatError:
                format_fails += 1
                output = self.retry_after_format_fail(output)
                continue
            if self.should_block_action(action):
                blocklist_fails += 1
                output = self.retry_after_blocklist_fail(output, action)
            else:
                return thought, action, output
        self.logger.warning(f"Malformat limit reached: \n{output}")
        return "Exit due to format error", "exit_format", output

    def forward_with_error_check(self, observation: str, state: str) -> tuple[str, str, str]:
        """Wrapper around `self.forward_model` that handles errors and retries
        due to format errors or blocked actions.

        Returns:
            thought: model reasoning
            action: action that the model proposes
            output: raw model output
        """
        try:
            output = self.forward_model(observation, state)
        except KeyboardInterrupt:
            raise
        except RuntimeError as e:
            self.logger.warning(f"Runtime error: {e}")
            return (
                f"Exit due to runtime error: {e}",
                "exit_error",
                f"exit due to runtime error: {e}",
            )
        except ContextWindowExceededError:
            self.logger.warning("Context window exceeded")
            return "Exit due to context window", "exit_context", "Exit due to context window"
        except CostLimitExceededError:
            self.logger.warning("Cost limit exceeded")
            return "Exit due to cost limit", "exit_cost", "Exit due to cost limit"
        except RetryError as e:
            self.logger.warning(f"Retry error: {e}")
            return (
                f"Exit due to retry error: {e}",
                "exit_api",
                f"exit due to retry error: {e}",
            )
        return self.check_format_and_requery(output)

    def init_environment_vars(self, env: SWEEnv):
        self.set_environment_vars(env, self.config.env_variables)

    def set_environment_vars(self, env: SWEEnv, env_variables: dict[str, Any]) -> None:
        assert self.config is not None  # mypy
        commands_to_execute = (
            [self.config.state_command.code]
            +
            # [code for code in self.config.util_functions] +
            # [command.code for command in self.config._commands] +
            [f"{k}={v}" for k, v in env_variables.items()]
        )
        commands = "\n".join(commands_to_execute)
        try:
            output = env.communicate(commands)
            if env.returncode != 0:
                msg = f"Nonzero return code: {env.returncode}\nOutput: {output}"
                raise RuntimeError(msg)
        except KeyboardInterrupt:
            raise
        except Exception as e:
            self.logger.warning("Failed to set environment variables")
            raise e
        command_files = list()
        for file in self.config.command_files:
            datum = dict()
            with open(file) as f:
                contents = f.read()
            datum["contents"] = contents
            filename = Path(file).name
            if not contents.strip().startswith("#!"):
                if filename.endswith(".sh"):
                    # files are sourced, so they are not executable
                    datum["name"] = Path(file).name
                    datum["type"] = "source_file"
                elif filename.startswith("_"):
                    # files are sourced, so they are not executable
                    datum["name"] = Path(file).name
                    datum["type"] = "utility"
                else:
                    msg = (
                        f"Non-shell script file {file} does not start with shebang.\n"
                        "Either add a shebang (#!) or change the file extension to .sh if you want to source it.\n"
                        "You can override this behavior by adding an underscore to the file name (e.g. _utils.py)."
                    )
                    raise ValueError(msg)
            else:
                # scripts are made executable
                datum["name"] = Path(file).name.rsplit(".", 1)[0]
                datum["type"] = "script"
            command_files.append(datum)
        env.add_commands(command_files)

    def get_environment_vars(self, env: SWEEnv) -> dict[str, Any]:
        """Get environment variables"""
        assert self.config is not None  # mypy
        env_vars = dict()
        for var in self.config.env_variables:
            env_vars[var] = env.communicate(f"echo ${var}").strip()
        return env_vars

    def call_subroutine(self, agent_name: str, sub_action, env: SWEEnv):
        """Call subroutine"""
        assert self.config is not None  # mypy
        env_vars = self.get_environment_vars(env)
        cwd = env.communicate("pwd -P").strip()
        init_observation = self.config._subroutines[agent_name].init_observation
        if init_observation is not None:
            obs, _, _, _ = env.step(init_observation.format(args=sub_action["args"]))
        else:
            obs = None
        if env.returncode != 0:
            self._append_history({"role": "user", "content": obs, "agent": agent_name})
            msg = f"Nonzero return code: {env.returncode} for init_observation in {agent_name}.\n{obs}"
            raise RuntimeError(msg)
        return_type = self.config._subroutines[agent_name].return_type
        sub_agent = Agent(agent_name, self.config._subroutines[agent_name].agent_args)
        sub_agent_output = sub_agent.run(
            {"issue": sub_action["args"]},
            env,
            observation=obs,
            return_type=return_type,
            init_model_stats=self.model.stats,
        )
        self.history += sub_agent.history
        self.set_environment_vars(env, env_vars)
        env.communicate(f"cd {cwd}")
        self.model.stats.replace(sub_agent.model.stats)
        return sub_agent_output

    def run(
        self,
        setup_args: dict[str, Any],
        env: SWEEnv,
        observation: str | None = None,
        traj_dir: Path | None = None,
        return_type: str | None = "info_trajectory",
        init_model_stats: APIStats | None = None,
    ):
        """
        Run the agent on an environment.
        Return the final value of the specified return type.

        Args:
            setup_args: Arguments to pass to the agent's setup method.
            env: The environment to run the agent on.
            observation: Output from environment setup
            traj_dir: Directory to save the trajectory to
            return_type: Controls what to return.
                This should be left at `info_trajectory`, the
                other values are for internal usage with subroutines.
            init_model_stats: Initial model stats to use for the run.

        Returns:
            If return_type is "info_trajectory", returns a tuple of
            the info dictionary and the trajectory (list of dictionaries).
        """
        done = False
        # mypy checks
        assert env.container_obj is not None
        assert env.record is not None
        assert self.config is not None

        if env.container_obj.id != self.last_container_id:
            self.logger.info(f"Initializing agent settings for container {env.container_obj.id}")
            self.init_environment_vars(env)
            self.last_container_id = env.container_obj.id
        # Re-initialize primary
        self.setup(setup_args, init_model_stats)

        for hook in self.hooks:
            hook.on_run_start()

        # Run action/observation loop
        trajectory = []
        info = {}
        traj_log_path = traj_dir / (env.record["instance_id"] + ".traj")
        self.logger.info("Trajectory will be saved to %s", traj_log_path)
        while not done:
            for hook in self.hooks:
                hook.on_step_start()
            state = env.communicate(self.state_command) if self.state_command else None
            thought, action, output = self.forward(observation, env.get_available_actions(), state)
            for hook in self.hooks:
                hook.on_actions_generated(thought=thought, action=action, output=output)
            observations = list()
            run_action = self._guard_multiline_input(action)
            for sub_action in self.split_actions(run_action):
                if sub_action["agent"] == self.name or sub_action["cmd_name"] == self.config.submit_command:
                    for hook in self.hooks:
                        hook.on_sub_action_started(sub_action=sub_action)
                    obs, _, done, info = env.step(sub_action["action"])
                    for hook in self.hooks:
                        hook.on_sub_action_executed(obs=obs, done=done)
                    observations.append(obs)
                    if sub_action["cmd_name"] == self.config.submit_command:
                        done = True
                    if done:
                        break
                else:
                    agent_name = sub_action["agent"]
                    sub_agent_output = self.call_subroutine(agent_name, sub_action, env)
                    observations.append(sub_agent_output)

            observation = "\n".join([obs for obs in observations if obs is not None])

            trajectory_step = TrajectoryStep(
                {
                    "action": action,
                    "observation": observation,
                    "response": output,
                    "state": state,
                    "thought": thought,
                },
            )
            trajectory.append(trajectory_step)
            model_stats: APIStats = self.model.stats
            info["model_stats"] = model_stats.to_dict()
            if traj_dir:
                self.save_trajectory(trajectory, traj_log_path, env_name=env.name, info=info)
            for hook in self.hooks:
                hook.on_step_done(trajectory_step=trajectory_step, model_stats=model_stats)

        for hook in self.hooks:
            hook.on_run_done()

        self.logger.info("Trajectory saved to %s", traj_log_path)

        if return_type == "info":
            return info
        if return_type == "info_trajectory":
            return info, trajectory
        return trajectory[-1][return_type]

`local_history: list[dict[str, str]]` `property`

Return the history of the agent since the last reset.

`state_command: str` `property`

Return the bash command that will be used to extract the environment state.

`add_hook(hook)`

Add hook to agent

Source code in sweagent/agent/agents.py

def add_hook(self, hook: AgentHook):
    """Add hook to agent"""
    hook.on_init()
    self.hooks.append(hook)

`call_subroutine(agent_name, sub_action, env)`

Call subroutine

Source code in sweagent/agent/agents.py

def call_subroutine(self, agent_name: str, sub_action, env: SWEEnv):
    """Call subroutine"""
    assert self.config is not None  # mypy
    env_vars = self.get_environment_vars(env)
    cwd = env.communicate("pwd -P").strip()
    init_observation = self.config._subroutines[agent_name].init_observation
    if init_observation is not None:
        obs, _, _, _ = env.step(init_observation.format(args=sub_action["args"]))
    else:
        obs = None
    if env.returncode != 0:
        self._append_history({"role": "user", "content": obs, "agent": agent_name})
        msg = f"Nonzero return code: {env.returncode} for init_observation in {agent_name}.\n{obs}"
        raise RuntimeError(msg)
    return_type = self.config._subroutines[agent_name].return_type
    sub_agent = Agent(agent_name, self.config._subroutines[agent_name].agent_args)
    sub_agent_output = sub_agent.run(
        {"issue": sub_action["args"]},
        env,
        observation=obs,
        return_type=return_type,
        init_model_stats=self.model.stats,
    )
    self.history += sub_agent.history
    self.set_environment_vars(env, env_vars)
    env.communicate(f"cd {cwd}")
    self.model.stats.replace(sub_agent.model.stats)
    return sub_agent_output

`check_format_and_requery(output)`

Query the model with the current state and observation with the appropriate template.

Try to parse the output into a thought and action. Retry if the output is malformatted or the action is blocked.

Returns:

Name	Type	Description
`thought`	`str`	model reasoning
`action`	`str`	action that the model proposes
`output`	`str`	raw model output

Source code in sweagent/agent/agents.py

def check_format_and_requery(
    self,
    output: str,
) -> tuple[str, str, str]:
    """Query the model with the current state and observation with the appropriate template.

    Try to parse the output into a thought and action. Retry if the output is malformatted or the action is blocked.

    Returns:
        thought: model reasoning
        action: action that the model proposes
        output: raw model output
    """
    # Condition for handling outputs with no thought (just action)
    if self.model.args.model_name == "human":
        return "", output, output
    elif self.model.args.model_name == "human_thought":
        thought, action = ParseFunction.get("ThoughtActionParser")(
            output,
            self.config._commands + self.config.subroutine_types,
            strict=False,
        )
        return thought, action, output

    format_fails = blocklist_fails = 0

    while format_fails + blocklist_fails <= 2:
        try:
            thought, action = self.config.parse_function(
                output,
                self.config._commands + self.config.subroutine_types,
                strict=False,
            )
        except KeyboardInterrupt:
            raise
        except FormatError:
            format_fails += 1
            output = self.retry_after_format_fail(output)
            continue
        if self.should_block_action(action):
            blocklist_fails += 1
            output = self.retry_after_blocklist_fail(output, action)
        else:
            return thought, action, output
    self.logger.warning(f"Malformat limit reached: \n{output}")
    return "Exit due to format error", "exit_format", output

`forward(observation, available_actions, state)`

Forwards the model

Parameters:

Name	Type	Description	Default
`observation`	`str`	Observation	required
`available_actions`	`list[str]`	Currently not used	required
`state`	`str`		required

Returns:

Name	Type	Description
`thought`	`str`	model reasoning
`action`	`str`	action that the model proposes
`output`	`str`	raw model output

Source code in sweagent/agent/agents.py

def forward(self, observation: str, available_actions: list[str], state: str) -> tuple[str, str, str]:
    """Forwards the model

    Args:
        observation: Observation
        available_actions: Currently not used
        state:

    Returns:
        thought: model reasoning
        action: action that the model proposes
        output: raw model output
    """
    thought, action, output = self.forward_with_error_check(observation, state)

    self._append_history(
        {
            "role": "assistant",
            "content": output,
            "thought": thought,
            "action": action,
            "agent": self.name,
        },
    )

    self.logger.info(f"💭 THOUGHT ({self.name})\n{thought}")
    self.logger.info(f"🎬 ACTION ({self.name})\n{action}")

    return thought, action, output

`forward_model(observation, state)`

Query the model with the current state and observation with the appropriate template.

Returns:

Name	Type	Description
`output`	`str`	raw model output

Source code in sweagent/agent/agents.py

def forward_model(self, observation: str, state: str) -> str:
    """Query the model with the current state and observation with the appropriate template.

    Returns:
        output: raw model output
    """
    assert self.config is not None  # mypy

    state_vars = json.loads(state)

    templates: list[str] = []
    # Determine observation template based on what prior observation was
    if self.history[-1]["role"] == "system" or self.history[-1].get("is_demo", False):
        # Show instance template if prev. obs. was initial system message
        templates = [self.config.instance_template]
        if self.config.strategy_template is not None:
            templates.append(self.config.strategy_template)
    elif observation is None or observation.strip() == "":
        # Show no output template if observation content was empty
        templates = [self.config.next_step_no_output_template]
    else:
        # Show standard output template if there is observation content
        templates = [self.config.next_step_template]

    # Populate selected template(s) with information (e.g., issue, arguments, state)
    messages = []
    for template in templates:
        messages.append(
            template.format(
                **self.instance_args,
                **self.system_args,
                **state_vars,
                observation=(observation if observation is not None else ""),
            ),
        )

    message = "\n".join(messages)

    self.logger.info(f"🤖 MODEL INPUT\n{message}")
    self._append_history({"role": "user", "content": message, "agent": self.name})

    for hook in self.hooks:
        hook.on_model_query(query=self.local_history, agent=self.name)
    return self.model.query(self.local_history)

`forward_with_error_check(observation, state)`

Wrapper around self.forward_model that handles errors and retries due to format errors or blocked actions.

Returns:

Name	Type	Description
`thought`	`str`	model reasoning
`action`	`str`	action that the model proposes
`output`	`str`	raw model output

Source code in sweagent/agent/agents.py

def forward_with_error_check(self, observation: str, state: str) -> tuple[str, str, str]:
    """Wrapper around `self.forward_model` that handles errors and retries
    due to format errors or blocked actions.

    Returns:
        thought: model reasoning
        action: action that the model proposes
        output: raw model output
    """
    try:
        output = self.forward_model(observation, state)
    except KeyboardInterrupt:
        raise
    except RuntimeError as e:
        self.logger.warning(f"Runtime error: {e}")
        return (
            f"Exit due to runtime error: {e}",
            "exit_error",
            f"exit due to runtime error: {e}",
        )
    except ContextWindowExceededError:
        self.logger.warning("Context window exceeded")
        return "Exit due to context window", "exit_context", "Exit due to context window"
    except CostLimitExceededError:
        self.logger.warning("Cost limit exceeded")
        return "Exit due to cost limit", "exit_cost", "Exit due to cost limit"
    except RetryError as e:
        self.logger.warning(f"Retry error: {e}")
        return (
            f"Exit due to retry error: {e}",
            "exit_api",
            f"exit due to retry error: {e}",
        )
    return self.check_format_and_requery(output)

`get_environment_vars(env)`

Get environment variables

Source code in sweagent/agent/agents.py

def get_environment_vars(self, env: SWEEnv) -> dict[str, Any]:
    """Get environment variables"""
    assert self.config is not None  # mypy
    env_vars = dict()
    for var in self.config.env_variables:
        env_vars[var] = env.communicate(f"echo ${var}").strip()
    return env_vars

`retry_after_blocklist_fail(output, action)`

Ask the model to correct (without committing to persistent history) after a disallowed command

Source code in sweagent/agent/agents.py

def retry_after_blocklist_fail(self, output: str, action: str) -> str:
    """Ask the model to correct (without committing to persistent history) after a disallowed command"""
    name = action.strip().split()[0]
    blocklist_error_message = self.config.blocklist_error_template.format(name=name)

    self.logger.warning(f"BLOCKLISTED OUTPUT\n{output}")
    self.logger.warning(f"BLOCKLIST ERROR\n{blocklist_error_message}")

    temp_history = self.local_history + [
        {"role": "assistant", "content": output, "agent": self.name},
        {"role": "user", "content": blocklist_error_message, "agent": self.name},
    ]
    return self.model.query(temp_history)

`retry_after_format_fail(output)`

Ask the model to correct (without committing to persistent history) after a malformatted model output

Source code in sweagent/agent/agents.py

def retry_after_format_fail(self, output: str) -> str:
    """Ask the model to correct (without committing to persistent history) after a malformatted model output"""
    format_error_template = self.config.format_error_template

    self.logger.warning(f"MALFORMED OUTPUT\n{output}")
    self.logger.warning(f"FORMAT ERROR\n{format_error_template}")

    temp_history = self.local_history + [
        {"role": "assistant", "content": output, "agent": self.name},
        {"role": "user", "content": format_error_template, "agent": self.name},
    ]
    return self.model.query(temp_history)

`run(setup_args, env, observation=None, traj_dir=None, return_type='info_trajectory', init_model_stats=None)`

Run the agent on an environment. Return the final value of the specified return type.

Parameters:

Name	Type	Description	Default
`setup_args`	`dict[str, Any]`	Arguments to pass to the agent's setup method.	required
`env`	`SWEEnv`	The environment to run the agent on.	required
`observation`	`str \| None`	Output from environment setup	`None`
`traj_dir`	`Path \| None`	Directory to save the trajectory to	`None`
`return_type`	`str \| None`	Controls what to return. This should be left at `info_trajectory`, the other values are for internal usage with subroutines.	`'info_trajectory'`
`init_model_stats`	`APIStats \| None`	Initial model stats to use for the run.	`None`

Returns:

Type	Description
	If return_type is "info_trajectory", returns a tuple of
	the info dictionary and the trajectory (list of dictionaries).

Source code in sweagent/agent/agents.py

def run(
    self,
    setup_args: dict[str, Any],
    env: SWEEnv,
    observation: str | None = None,
    traj_dir: Path | None = None,
    return_type: str | None = "info_trajectory",
    init_model_stats: APIStats | None = None,
):
    """
    Run the agent on an environment.
    Return the final value of the specified return type.

    Args:
        setup_args: Arguments to pass to the agent's setup method.
        env: The environment to run the agent on.
        observation: Output from environment setup
        traj_dir: Directory to save the trajectory to
        return_type: Controls what to return.
            This should be left at `info_trajectory`, the
            other values are for internal usage with subroutines.
        init_model_stats: Initial model stats to use for the run.

    Returns:
        If return_type is "info_trajectory", returns a tuple of
        the info dictionary and the trajectory (list of dictionaries).
    """
    done = False
    # mypy checks
    assert env.container_obj is not None
    assert env.record is not None
    assert self.config is not None

    if env.container_obj.id != self.last_container_id:
        self.logger.info(f"Initializing agent settings for container {env.container_obj.id}")
        self.init_environment_vars(env)
        self.last_container_id = env.container_obj.id
    # Re-initialize primary
    self.setup(setup_args, init_model_stats)

    for hook in self.hooks:
        hook.on_run_start()

    # Run action/observation loop
    trajectory = []
    info = {}
    traj_log_path = traj_dir / (env.record["instance_id"] + ".traj")
    self.logger.info("Trajectory will be saved to %s", traj_log_path)
    while not done:
        for hook in self.hooks:
            hook.on_step_start()
        state = env.communicate(self.state_command) if self.state_command else None
        thought, action, output = self.forward(observation, env.get_available_actions(), state)
        for hook in self.hooks:
            hook.on_actions_generated(thought=thought, action=action, output=output)
        observations = list()
        run_action = self._guard_multiline_input(action)
        for sub_action in self.split_actions(run_action):
            if sub_action["agent"] == self.name or sub_action["cmd_name"] == self.config.submit_command:
                for hook in self.hooks:
                    hook.on_sub_action_started(sub_action=sub_action)
                obs, _, done, info = env.step(sub_action["action"])
                for hook in self.hooks:
                    hook.on_sub_action_executed(obs=obs, done=done)
                observations.append(obs)
                if sub_action["cmd_name"] == self.config.submit_command:
                    done = True
                if done:
                    break
            else:
                agent_name = sub_action["agent"]
                sub_agent_output = self.call_subroutine(agent_name, sub_action, env)
                observations.append(sub_agent_output)

        observation = "\n".join([obs for obs in observations if obs is not None])

        trajectory_step = TrajectoryStep(
            {
                "action": action,
                "observation": observation,
                "response": output,
                "state": state,
                "thought": thought,
            },
        )
        trajectory.append(trajectory_step)
        model_stats: APIStats = self.model.stats
        info["model_stats"] = model_stats.to_dict()
        if traj_dir:
            self.save_trajectory(trajectory, traj_log_path, env_name=env.name, info=info)
        for hook in self.hooks:
            hook.on_step_done(trajectory_step=trajectory_step, model_stats=model_stats)

    for hook in self.hooks:
        hook.on_run_done()

    self.logger.info("Trajectory saved to %s", traj_log_path)

    if return_type == "info":
        return info
    if return_type == "info_trajectory":
        return info, trajectory
    return trajectory[-1][return_type]

`save_trajectory(trajectory, log_path, env_name, info)`

Save the trajectory

Source code in sweagent/agent/agents.py

def save_trajectory(
    self, trajectory: list[dict[str, Any]], log_path: Path, env_name: str, info: dict[str, Any]
) -> None:
    """Save the trajectory"""
    log_dict = {
        "environment": env_name,
        "trajectory": trajectory,
        "history": self.history,
        "info": info,
    }
    log_path.write_text(json.dumps(log_dict, indent=2))

`setup(instance_args, init_model_stats=None)`

Setup the agent for a new instance. This includes formatting the system message and adding demonstrations to the history.

Parameters:

Name	Type	Description	Default
`instance_args`		Arguments for the instance	required

Source code in sweagent/agent/agents.py

def setup(self, instance_args, init_model_stats=None) -> None:
    """Setup the agent for a new instance. This includes
    formatting the system message and adding demonstrations to the history.

    Args:
        instance_args: Arguments for the instance
    """
    assert self.config is not None  # mypy
    self.model.reset_stats(init_model_stats)
    self.instance_args = instance_args

    system_msg = self.config.system_template.format(**self.system_args)
    self.logger.info(f"SYSTEM ({self.name})\n{system_msg}")

    self.history: list[dict[str, Any]] = []
    self._append_history({"role": "system", "content": system_msg, "agent": self.name})

    if "history_to_messages" in dir(self.model):
        for demonstration_path in self.config.demonstrations:
            if self.config.demonstration_template is None and not self.config.put_demos_in_history:
                msg = "Cannot use demonstrations without a demonstration template or put_demos_in_history=True"
                raise ValueError(msg)

            # Load history
            self.logger.info(f"DEMONSTRATION: {demonstration_path}")
            demo_history = json.loads(Path(demonstration_path).read_text())["history"]
            demo_history = [
                entry
                for entry in demo_history
                if ("agent" not in entry) or ("agent" in entry and entry["agent"] == self.name)
            ]

            if self.config.put_demos_in_history:
                if self.config.demonstration_template is not None:
                    self.logger.warning("Demonstration template is ignored for put_demos_in_history=True")
                # Add demonstration to history directly as separate messages
                for entry in demo_history:
                    if entry["role"] != "system":
                        entry["is_demo"] = True
                        self._append_history(entry)
            else:
                # Add demonstration as single message to history
                demo_message = self.model.history_to_messages(
                    demo_history,
                    is_demonstration=True,
                )
                demonstration = self.config.demonstration_template.format(demonstration=demo_message)
                self._append_history(
                    {
                        "agent": self.name,
                        "content": demonstration,
                        "is_demo": True,
                        "role": "user",
                    },
                )

`should_block_action(action)`

Check if the command should be blocked.

Source code in sweagent/agent/agents.py

def should_block_action(self, action: str) -> bool:
    """Check if the command should be blocked."""
    names = action.strip().split()
    if len(names) == 0:
        return False
    name = names[0]
    if name in self.config.blocklist:
        return True
    if name in self.config.blocklist_standalone and name == action.strip():
        return True
    return False

`split_actions(action, pattern_type='subroutine')`

Split an action into a list of actions in a greedy manner, each of which is a subroutine call or a single command.

Source code in sweagent/agent/agents.py

def split_actions(self, action: str, pattern_type="subroutine") -> list[dict[str, Any]]:
    """Split an action into a list of actions in a greedy manner, each of which is a subroutine call or a single command."""
    parsed_action = list()
    rem_action = action
    while rem_action.strip():
        first_match = self._get_first_match(rem_action, pattern_type)
        if first_match:
            pre_action = rem_action[: first_match.start()]
            match_action = rem_action[first_match.start() : first_match.end()]
            rem_action = rem_action[first_match.end() :]
            if pre_action.strip():
                parsed_action.append({"agent": self.name, "action": pre_action, "cmd_name": None})
            if match_action.strip():
                if match_action.split()[0] == self.config.submit_command:
                    parsed_action.append(
                        {
                            "agent": self.name,
                            "action": match_action,
                            "cmd_name": first_match.group(1),
                        },
                    )  # submit command is not a subroutine
                else:
                    parsed_action.append(
                        {
                            "agent": first_match.group(1),
                            "args": first_match.group(2),
                            "action": match_action,
                            "cmd_name": first_match.group(1),
                        },
                    )
        else:
            parsed_action.append({"agent": self.name, "action": rem_action, "cmd_name": None})
            rem_action = ""
    return parsed_action

`AgentArguments` `dataclass`

Bases: FlattenedAccess, FrozenSerializable

Configure the agent's behaviour (templates, parse functions, blocklists, ...).

Source code in sweagent/agent/agents.py

@dataclass(frozen=True)
class AgentArguments(FlattenedAccess, FrozenSerializable):
    """Configure the agent's behaviour (templates, parse functions, blocklists, ...)."""

    model: ModelArguments = None

    # Policy can only be set via config yaml file from command line
    config_file: Path | None = None
    config: AgentConfig | None = field(default=None, cmd=False)

    def __post_init__(self):
        if self.config is None and self.config_file is not None:
            # If unassigned, we load the config from the file to store its contents with the overall arguments
            config = AgentConfig.load_yaml(self.config_file)
            object.__setattr__(self, "config", config)
        assert self.config is not None  # mypy
        for subroutine in getattr(self.config, "subroutines", {}).values():
            model_args = subroutine.model
            object.__setattr__(
                model_args,
                "per_instance_cost_limit",
                self.model.per_instance_cost_limit,
            )
            object.__setattr__(model_args, "total_cost_limit", self.model.total_cost_limit)

`AgentHook`

Source code in sweagent/agent/agents.py

class AgentHook:
    def on_init(self): ...

    def on_run_start(
        self,
    ): ...

    def on_step_start(self): ...

    def on_actions_generated(self, *, thought: str, action: str, output: str): ...

    def on_sub_action_started(self, *, sub_action: str): ...

    def on_sub_action_executed(self, *, obs: str, done: bool): ...

    def on_step_done(self, *, trajectory_step: TrajectoryStep, model_stats: APIStats): ...

    def on_run_done(self): ...

    def on_model_query(self, *, query: str, agent: str):
        """Actually query the model with the complete history."""

    def on_query_message_added(
        self,
        *,
        role: str,
        content: str,
        agent: str,
        is_demo: bool = False,
        thought: str = "",
        action: str = "",
    ): ...

`on_model_query(*, query, agent)`

Actually query the model with the complete history.

Source code in sweagent/agent/agents.py

def on_model_query(self, *, query: str, agent: str):
    """Actually query the model with the complete history."""

The agent class

Agent

local_history: list[dict[str, str]] property

state_command: str property

add_hook(hook)

call_subroutine(agent_name, sub_action, env)

check_format_and_requery(output)

forward(observation, available_actions, state)

forward_model(observation, state)

forward_with_error_check(observation, state)

get_environment_vars(env)

retry_after_blocklist_fail(output, action)

retry_after_format_fail(output)

run(setup_args, env, observation=None, traj_dir=None, return_type='info_trajectory', init_model_stats=None)

save_trajectory(trajectory, log_path, env_name, info)

setup(instance_args, init_model_stats=None)

should_block_action(action)

split_actions(action, pattern_type='subroutine')

AgentArguments dataclass

AgentHook

on_model_query(*, query, agent)

`Agent`

`local_history: list[dict[str, str]]` `property`

`state_command: str` `property`

`add_hook(hook)`

`call_subroutine(agent_name, sub_action, env)`

`check_format_and_requery(output)`

`forward(observation, available_actions, state)`

`forward_model(observation, state)`

`forward_with_error_check(observation, state)`

`get_environment_vars(env)`

`retry_after_blocklist_fail(output, action)`

`retry_after_format_fail(output)`

`run(setup_args, env, observation=None, traj_dir=None, return_type='info_trajectory', init_model_stats=None)`

`save_trajectory(trajectory, log_path, env_name, info)`

`setup(instance_args, init_model_stats=None)`

`should_block_action(action)`

`split_actions(action, pattern_type='subroutine')`

`AgentArguments` `dataclass`

`AgentHook`

`on_model_query(*, query, agent)`