OpenAI Agents SDK 완전 가이드: 차세대 AI 에이전트 개발하기

AI 에이전트는 더 이상 먼 미래의 이야기가 아닙니다. OpenAI가 2025년 3월에 발표한 Agents SDK를 통해 누구나 강력한 AI 에이전트를 쉽게 개발할 수 있게 되었습니다. 이 가이드에서는 기초부터 실제 애플리케이션 구축까지 모든 과정을 상세히 다뤄보겠습니다.

OpenAI Agents SDK란?

OpenAI Agents SDK는 LLM을 활용한 자율적인 AI 에이전트를 구축하기 위한 Python 프레임워크입니다. 기존의 단순한 챗봇이나 LLM 애플리케이션과 달리, 에이전트는 복잡한 워크플로우를 독립적으로 수행할 수 있습니다.

에이전트의 핵심 특징

자율적 워크플로우 실행: 사용자의 목표를 이해하고 독립적으로 작업을 완료
동적 도구 선택: 상황에 따라 적절한 도구를 선택하고 활용
실시간 의사결정: 실행 중 오류나 예외 상황에 대처
다중 에이전트 협업: 필요시 다른 에이전트와 협력하여 작업 수행

설치 및 환경 설정

사전 요구사항

# Python 3.10 이상 필요
python --version

# 가상환경 생성 (권장)
python -m venv agents-env
source agents-env/bin/activate  # Linux/macOS
# agents-env\Scripts\activate  # Windows

SDK 설치

# 기본 설치
pip install openai-agents

# 추가 의존성과 함께 설치 (권장)
pip install openai-agents python-dotenv

환경 변수 설정

# .env 파일 생성
OPENAI_API_KEY=your_openai_api_key_here

핵심 구성 요소

1. Agent 클래스

에이전트의 핵심이 되는 클래스입니다:

from agents import Agent

agent = Agent(
    name="assistant",
    instructions="당신은 도움이 되는 AI 어시스턴트입니다.",
    tools=[],  # 사용 가능한 도구들
    handoffs=[],  # 다른 에이전트들
    model="gpt-4"  # 사용할 모델
)

2. Runner 클래스

에이전트 실행을 관리하는 클래스입니다:

from agents import Runner
import asyncio

# 비동기 실행
result = await Runner.run(agent, "안녕하세요!")

# 동기 실행
result = Runner.run_sync(agent, "안녕하세요!")

3. Tools (도구)

에이전트가 외부 시스템과 상호작용할 수 있게 해주는 기능입니다:

from agents import function_tool

@function_tool
def get_weather(city: str) -> str:
    """특정 도시의 날씨 정보를 가져옵니다.
    
    Args:
        city: 날씨를 확인할 도시명
    
    Returns:
        날씨 정보 문자열
    """
    # 실제 API 호출 로직
    return f"{city}의 현재 날씨는 맑음입니다."

기본 에이전트 구축하기

1. 단순한 대화 에이전트

import asyncio
import os
from dotenv import load_dotenv
from agents import Agent, Runner

# 환경 변수 로드
load_dotenv()

# 기본 에이전트 생성
basic_agent = Agent(
    name="chat_assistant",
    instructions="""
    당신은 친근하고 도움이 되는 AI 어시스턴트입니다.
    사용자의 질문에 정확하고 간결하게 답변해주세요.
    """
)

async def main():
    # 에이전트 실행
    result = await Runner.run(
        basic_agent,
        "파이썬에서 리스트와 튜플의 차이점을 설명해주세요."
    )
    
    print(result.final_output)

# 실행
asyncio.run(main())

2. 구조화된 응답 에이전트

from pydantic import BaseModel
from agents import Agent, Runner

class CodeExplanation(BaseModel):
    language: str
    concept: str
    explanation: str
    example: str
    difficulty: str

structured_agent = Agent(
    name="code_tutor",
    instructions="""
    당신은 프로그래밍 튜터입니다. 
    사용자의 질문에 대해 구조화된 설명을 제공해주세요.
    """,
    output_type=CodeExplanation
)

async def explain_concept():
    result = await Runner.run(
        structured_agent,
        "파이썬의 리스트 컴프리헨션에 대해 설명해주세요."
    )
    
    explanation = result.final_output
    print(f"언어: {explanation.language}")
    print(f"개념: {explanation.concept}")
    print(f"설명: {explanation.explanation}")
    print(f"예제: {explanation.example}")
    print(f"난이도: {explanation.difficulty}")

도구(Tools) 개발하기

1. 기본 도구 생성

import datetime
import requests
from agents import function_tool

@function_tool
def get_current_time() -> str:
    """현재 시간을 반환합니다."""
    return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

@function_tool
def search_web(query: str, limit: int = 5) -> list:
    """웹에서 정보를 검색합니다.
    
    Args:
        query: 검색 쿼리
        limit: 결과 개수 제한
    
    Returns:
        검색 결과 리스트
    """
    # 실제 검색 API 호출 (예: DuckDuckGo, Google 등)
    # 여기서는 예시 데이터 반환
    return [
        {"title": f"검색 결과 {i+1}", "url": f"https://example.com/{i+1}"}
        for i in range(limit)
    ]

@function_tool
def save_note(content: str, filename: str = None) -> str:
    """노트를 파일로 저장합니다.
    
    Args:
        content: 저장할 내용
        filename: 파일명 (선택사항)
    
    Returns:
        저장 결과 메시지
    """
    if not filename:
        filename = f"note_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
    
    try:
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(content)
        return f"노트가 {filename}에 성공적으로 저장되었습니다."
    except Exception as e:
        return f"저장 중 오류 발생: {str(e)}"

2. 도구를 활용하는 에이전트

assistant_agent = Agent(
    name="assistant_with_tools",
    instructions="""
    당신은 다양한 도구를 활용할 수 있는 AI 어시스턴트입니다.
    
    사용 가능한 도구:
    - get_current_time: 현재 시간 확인
    - search_web: 웹 검색
    - save_note: 노트 저장
    
    사용자의 요청에 따라 적절한 도구를 선택하여 사용하세요.
    """,
    tools=[get_current_time, search_web, save_note]
)

async def demo_tools():
    # 시간 확인
    result1 = await Runner.run(assistant_agent, "지금 몇 시인가요?")
    print("시간 확인:", result1.final_output)
    
    # 웹 검색
    result2 = await Runner.run(assistant_agent, "파이썬 학습 자료를 검색해주세요.")
    print("검색 결과:", result2.final_output)
    
    # 노트 저장
    result3 = await Runner.run(
        assistant_agent, 
        "오늘 학습한 OpenAI Agents SDK 내용을 정리해서 저장해주세요."
    )
    print("노트 저장:", result3.final_output)

멀티 에이전트 시스템 구축

1. Manager 패턴

중앙 관리자 에이전트가 여러 전문 에이전트를 조율하는 패턴입니다:

from agents import Agent, Runner

# 전문 에이전트들 생성
code_reviewer = Agent(
    name="code_reviewer",
    instructions="""
    당신은 코드 리뷰 전문가입니다.
    코드의 품질, 성능, 보안 측면을 검토하고 개선사항을 제안합니다.
    """,
    tools=[save_note]
)

documentation_writer = Agent(
    name="doc_writer",
    instructions="""
    당신은 기술 문서 작성 전문가입니다.
    코드에 대한 명확하고 포괄적인 문서를 작성합니다.
    """,
    tools=[save_note]
)

# 매니저 에이전트
manager_agent = Agent(
    name="project_manager",
    instructions="""
    당신은 소프트웨어 개발 프로젝트 매니저입니다.
    
    코드 관련 요청이 들어오면:
    1. 코드 리뷰가 필요하면 code_reviewer에게 위임
    2. 문서화가 필요하면 doc_writer에게 위임
    3. 종합적인 관리가 필요하면 두 에이전트를 모두 활용
    
    각 에이전트의 결과를 종합하여 최종 결과를 제시합니다.
    """,
    tools=[
        code_reviewer.as_tool(
            tool_name="review_code",
            tool_description="코드 리뷰를 수행합니다"
        ),
        documentation_writer.as_tool(
            tool_name="write_documentation",
            tool_description="기술 문서를 작성합니다"
        )
    ]
)

async def demo_manager_pattern():
    result = await Runner.run(
        manager_agent,
        """
        다음 Python 함수를 리뷰하고 문서화해주세요:
        
        def fibonacci(n):
            if n <= 1:
                return n
            return fibonacci(n-1) + fibonacci(n-2)
        """
    )
    print(result.final_output)

2. Handoff 패턴

에이전트들이 서로 직접 작업을 넘겨주는 패턴입니다:

# 고객 지원 에이전트들
technical_support = Agent(
    name="technical_support",
    instructions="""
    당신은 기술 지원 전문가입니다.
    기술적인 문제 해결과 시스템 장애 대응을 담당합니다.
    """,
    tools=[search_web, save_note]
)

sales_agent = Agent(
    name="sales_agent",
    instructions="""
    당신은 영업 담당자입니다.
    제품 문의, 구매 상담, 견적 제공을 담당합니다.
    """,
    tools=[search_web, save_note]
)

billing_agent = Agent(
    name="billing_agent", 
    instructions="""
    당신은 결제 및 청구 담당자입니다.
    결제 문제, 환불, 구독 관리를 담당합니다.
    """,
    tools=[search_web, save_note]
)

# 트리아지 에이전트 (첫 접점)
triage_agent = Agent(
    name="customer_service_triage",
    instructions="""
    당신은 고객 서비스 트리아지 담당자입니다.
    고객의 문의를 분석하여 적절한 전문가에게 연결합니다:
    
    - 기술적 문제: technical_support로 연결
    - 구매/영업 문의: sales_agent로 연결  
    - 결제/청구 문의: billing_agent로 연결
    """,
    handoffs=[technical_support, sales_agent, billing_agent]
)

# 역방향 연결 설정
technical_support.handoffs = [triage_agent]
sales_agent.handoffs = [triage_agent]
billing_agent.handoffs = [triage_agent]

async def demo_handoff_pattern():
    result = await Runner.run(
        triage_agent,
        "결제가 실패했는데 환불 받을 수 있나요?"
    )
    print(result.final_output)

고급 기능 활용하기

1. 컨텍스트 관리

에이전트 간 정보 공유를 위한 컨텍스트 시스템:

from pydantic import BaseModel
from agents import RunContextWrapper

class ProjectContext(BaseModel):
    project_name: str
    team_members: list = []
    tasks: list = []
    status: str = "진행중"

@function_tool
def add_team_member(ctx: RunContextWrapper[ProjectContext], name: str, role: str) -> str:
    """프로젝트에 팀원을 추가합니다."""
    context = ctx.get()
    context.team_members.append({"name": name, "role": role})
    ctx.set(context)
    return f"{name}({role})이 프로젝트에 추가되었습니다."

@function_tool
def add_task(ctx: RunContextWrapper[ProjectContext], task: str, assignee: str = None) -> str:
    """프로젝트에 작업을 추가합니다."""
    context = ctx.get()
    context.tasks.append({"task": task, "assignee": assignee, "status": "대기중"})
    ctx.set(context)
    return f"작업이 추가되었습니다: {task}"

project_manager = Agent(
    name="project_manager",
    instructions="""
    당신은 프로젝트 매니저입니다.
    팀원 관리, 작업 할당, 프로젝트 진행 상황 추적을 담당합니다.
    """,
    tools=[add_team_member, add_task]
)

async def demo_context():
    # 컨텍스트 생성
    context = ProjectContext(project_name="AI 챗봇 개발")
    
    # 컨텍스트와 함께 에이전트 실행
    result = await Runner.run(
        project_manager,
        "김개발자를 백엔드 개발자로 팀에 추가하고, 'API 설계' 작업을 할당해주세요.",
        context=context
    )
    
    print(result.final_output)
    print("업데이트된 컨텍스트:", context)

2. 가드레일(Guardrails) 구현

에이전트의 안전하고 예측 가능한 동작을 보장하는 안전장치:

from agents import (
    Agent, Runner, input_guardrail, output_guardrail,
    GuardrailFunctionOutput, GuardrailTripwireTriggered
)

# 입력 가드레일
@input_guardrail
async def inappropriate_content_filter(
    ctx, agent: Agent, input_text: str
) -> GuardrailFunctionOutput:
    """부적절한 내용을 필터링합니다."""
    inappropriate_keywords = ["욕설", "비방", "폭력"]
    
    for keyword in inappropriate_keywords:
        if keyword in input_text:
            return GuardrailFunctionOutput(
                output_info=f"부적절한 내용이 감지되었습니다: {keyword}",
                tripwire_triggered=True
            )
    
    return GuardrailFunctionOutput(
        output_info="입력이 적절합니다.",
        tripwire_triggered=False
    )

# 출력 가드레일
@output_guardrail
async def response_length_limit(
    ctx, agent: Agent, output: str
) -> GuardrailFunctionOutput:
    """응답 길이를 제한합니다."""
    max_length = 1000
    
    if len(output) > max_length:
        return GuardrailFunctionOutput(
            output_info=f"응답이 너무 깁니다 ({len(output)}자). 최대 {max_length}자까지 허용됩니다.",
            tripwire_triggered=True
        )
    
    return GuardrailFunctionOutput(
        output_info="응답 길이가 적절합니다.",
        tripwire_triggered=False
    )

safe_agent = Agent(
    name="safe_assistant",
    instructions="안전한 AI 어시스턴트입니다.",
    input_guardrails=[inappropriate_content_filter],
    output_guardrails=[response_length_limit]
)

async def demo_guardrails():
    try:
        result = await Runner.run(safe_agent, "일반적인 질문입니다.")
        print("정상 응답:", result.final_output)
    except GuardrailTripwireTriggered as e:
        print("가드레일 발동:", str(e))

3. 스트리밍 및 추적

실시간 응답과 디버깅을 위한 기능:

from agents import Runner, RunConfig

async def demo_streaming():
    agent = Agent(
        name="streaming_agent",
        instructions="단계별로 상세한 설명을 제공하는 어시스턴트입니다."
    )
    
    # 스트리밍 설정
    config = RunConfig(
        workflow_name="스트리밍 데모",
        tracing_disabled=False
    )
    
    # 스트리밍 실행
    async for chunk in Runner.run_streamed(
        agent, 
        "파이썬으로 웹 스크래핑을 하는 방법을 단계별로 설명해주세요.",
        run_config=config
    ):
        print("스트림 청크:", chunk)

MCP (Model Context Protocol) 통합

1. MCP 서버 연결

외부 도구와의 표준화된 연결을 위한 MCP 프로토콜 활용:

from agents.mcp import MCPServerSse

async def demo_mcp():
    # MCP 서버 연결 (예: GitHub 연동)
    github_server = MCPServerSse({
        "url": "https://mcp.composio.dev/github/your-instance-id"
    })
    
    try:
        await github_server.connect()
        
        github_agent = Agent(
            name="github_agent",
            instructions="""
            당신은 GitHub 관리 전문가입니다.
            이슈 생성, 풀 리퀘스트 관리, 저장소 관리를 담당합니다.
            """,
            mcp_servers=[github_server]
        )
        
        result = await Runner.run(
            github_agent,
            "저장소 'my-project'에 '버그 수정' 제목으로 이슈를 생성해주세요."
        )
        
        print(result.final_output)
        
    finally:
        await github_server.cleanup()

실제 활용 사례

1. 이메일 관리 시스템

class EmailManagementSystem:
    def __init__(self):
        self.classifier_agent = Agent(
            name="email_classifier",
            instructions="""
            이메일을 다음 카테고리로 분류합니다:
            - urgent: 긴급 처리 필요
            - important: 중요하지만 긴급하지 않음
            - routine: 일반적인 업무 이메일
            - spam: 스팸 또는 불필요한 이메일
            """,
            tools=[self._classify_email]
        )
        
        self.responder_agent = Agent(
            name="email_responder", 
            instructions="""
            분류된 이메일에 대해 적절한 응답을 생성합니다.
            긴급한 이메일은 즉시 처리하고, 일반 이메일은 정중한 응답을 생성합니다.
            """,
            tools=[self._send_email, self._draft_response]
        )
    
    @function_tool
    def _classify_email(self, subject: str, body: str) -> dict:
        """이메일을 분류합니다."""
        # 실제 분류 로직
        return {"category": "important", "priority": 8}
    
    @function_tool  
    def _send_email(self, to: str, subject: str, body: str) -> str:
        """이메일을 발송합니다."""
        # 실제 이메일 발송 로직
        return f"이메일이 {to}에게 발송되었습니다."
    
    @function_tool
    def _draft_response(self, original_email: str) -> str:
        """응답 초안을 작성합니다."""
        # 응답 생성 로직
        return "정중한 응답 초안이 생성되었습니다."

    async def process_email(self, email_data):
        # 1. 이메일 분류
        classification = await Runner.run(
            self.classifier_agent,
            f"제목: {email_data['subject']}\n내용: {email_data['body']}"
        )
        
        # 2. 적절한 응답 생성
        response = await Runner.run(
            self.responder_agent,
            f"분류: {classification.final_output}\n원본 이메일: {email_data}"
        )
        
        return response.final_output

2. 코드 리뷰 자동화 시스템

class CodeReviewSystem:
    def __init__(self):
        self.security_reviewer = Agent(
            name="security_reviewer",
            instructions="코드의 보안 취약점을 검토합니다.",
            tools=[self._check_security]
        )
        
        self.performance_reviewer = Agent(
            name="performance_reviewer", 
            instructions="코드의 성능 최적화 가능성을 검토합니다.",
            tools=[self._analyze_performance]
        )
        
        self.style_reviewer = Agent(
            name="style_reviewer",
            instructions="코딩 스타일과 가독성을 검토합니다.",
            tools=[self._check_style]
        )
        
        self.coordinator = Agent(
            name="review_coordinator",
            instructions="모든 리뷰 결과를 종합하여 최종 보고서를 작성합니다.",
            handoffs=[self.security_reviewer, self.performance_reviewer, self.style_reviewer]
        )
    
    @function_tool
    def _check_security(self, code: str) -> dict:
        """보안 검토를 수행합니다."""
        # 실제 보안 분석 로직
        return {"vulnerabilities": [], "risk_level": "low"}
    
    @function_tool
    def _analyze_performance(self, code: str) -> dict:
        """성능 분석을 수행합니다."""
        # 실제 성능 분석 로직
        return {"bottlenecks": [], "optimization_suggestions": []}
    
    @function_tool
    def _check_style(self, code: str) -> dict:
        """스타일 검토를 수행합니다."""
        # 실제 스타일 검토 로직
        return {"style_issues": [], "readability_score": 8.5}

    async def review_code(self, code: str, file_path: str):
        result = await Runner.run(
            self.coordinator,
            f"다음 코드를 전체적으로 리뷰해주세요:\n\n파일: {file_path}\n\n{code}"
        )
        return result.final_output

모니터링 및 디버깅

1. 로깅 및 추적

import logging
from agents import RunConfig

# 로깅 설정
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

async def monitored_execution():
    config = RunConfig(
        workflow_name="모니터링 데모",
        tracing_disabled=False,
        max_turns=10  # 최대 실행 횟수 제한
    )
    
    agent = Agent(
        name="monitored_agent",
        instructions="모니터링되는 에이전트입니다."
    )
    
    try:
        result = await Runner.run(
            agent,
            "복잡한 작업을 수행해주세요.",
            run_config=config
        )
        
        logger.info(f"작업 완료: {result.final_output}")
        
        # 추적 정보 확인 (OpenAI Platform에서 확인 가능)
        print("추적 정보는 https://platform.openai.com/traces 에서 확인할 수 있습니다.")
        
    except Exception as e:
        logger.error(f"실행 중 오류 발생: {str(e)}")

2. 성능 최적화

import time
from typing import List

class PerformanceOptimizedAgent:
    def __init__(self):
        # 모델 선택 최적화
        self.fast_agent = Agent(
            name="fast_agent",
            instructions="빠른 응답이 필요한 간단한 작업을 처리합니다.",
            model="gpt-3.5-turbo"  # 빠르고 저렴한 모델
        )
        
        self.smart_agent = Agent(
            name="smart_agent", 
            instructions="복잡한 추론이 필요한 작업을 처리합니다.",
            model="gpt-4"  # 더 능력 있는 모델
        )
    
    async def process_batch(self, tasks: List[str]):
        """배치 처리로 효율성 증대"""
        results = []
        
        for task in tasks:
            # 작업 복잡도에 따라 에이전트 선택
            if self._is_simple_task(task):
                agent = self.fast_agent
            else:
                agent = self.smart_agent
            
            start_time = time.time()
            result = await Runner.run(agent, task)
            end_time = time.time()
            
            results.append({
                "task": task,
                "result": result.final_output,
                "processing_time": end_time - start_time,
                "model_used": agent.model
            })
        
        return results
    
    def _is_simple_task(self, task: str) -> bool:
        """작업의 복잡도를 판단합니다."""
        simple_keywords = ["안녕", "시간", "날씨", "간단한"]
        return any(keyword in task for keyword in simple_keywords)

모범 사례 및 팁

1. 에이전트 설계 원칙

# ✅ 좋은 예시
good_agent = Agent(
    name="customer_support",  # 명확한 이름
    instructions="""
    당신은 고객 지원 전문가입니다.
    
    주요 역할:
    1. 고객 문의사항을 정확히 파악
    2. 적절한 해결책 제시
    3. 필요시 전문가에게 에스컬레이션
    
    대응 원칙:
    - 친절하고 전문적인 톤 유지
    - 구체적이고 실행 가능한 답변 제공
    - 불확실한 경우 명확히 표현
    """,  # 구체적이고 명확한 지침
    tools=[search_knowledge_base, escalate_to_human],  # 필요한 도구만 제공
    model="gpt-4"  # 적절한 모델 선택
)

# ❌ 피해야 할 예시  
bad_agent = Agent(
    name="agent",  # 모호한 이름
    instructions="도움을 주세요.",  # 불명확한 지침
    tools=all_available_tools,  # 불필요한 도구들까지 모두 포함
    model="gpt-4"  # 모든 작업에 가장 비싼 모델 사용
)

2. 오류 처리 및 복구

async def robust_agent_execution():
    agent = Agent(
        name="robust_agent",
        instructions="안정적인 처리를 위한 에이전트입니다."
    )
    
    max_retries = 3
    retry_count = 0
    
    while retry_count < max_retries:
        try:
            result = await Runner.run(agent, "작업을 수행해주세요.")
            return result.final_output
            
        except Exception as e:
            retry_count += 1
            logger.warning(f"시도 {retry_count} 실패: {str(e)}")
            
            if retry_count >= max_retries:
                logger.error("최대 재시도 횟수 초과")
                raise e
            
            # 재시도 전 잠시 대기
            await asyncio.sleep(2 ** retry_count)  # 지수적 백오프
    
    raise Exception("모든 재시도 실패")

3. 테스트 전략

import pytest
from unittest.mock import AsyncMock

class TestAgentSystem:
    @pytest.fixture
    def mock_agent(self):
        agent = Agent(
            name="test_agent",
            instructions="테스트용 에이전트입니다."
        )
        return agent
    
    @pytest.mark.asyncio
    async def test_basic_response(self, mock_agent):
        """기본 응답 테스트"""
        result = await Runner.run(mock_agent, "안녕하세요")
        assert result.final_output is not None
        assert len(result.final_output) > 0
    
    @pytest.mark.asyncio
    async def test_tool_usage(self, mock_agent):
        """도구 사용 테스트"""
        # 모킹된 도구 설정
        mock_tool = AsyncMock(return_value="모킹된 결과")
        mock_agent.tools = [mock_tool]
        
        result = await Runner.run(mock_agent, "도구를 사용해주세요")
        
        # 도구가 호출되었는지 확인
        assert mock_tool.called

결론

OpenAI Agents SDK는 AI 에이전트 개발의 새로운 패러다임을 제시합니다. 이 가이드에서 다룬 내용들을 통해:

핵심 장점

간단한 시작: 몇 줄의 코드로 강력한 에이전트 구축 가능
확장성: 단일 에이전트부터 복잡한 멀티 에이전트 시스템까지
유연성: 다양한 도구와 외부 시스템 통합 지원
안정성: 가드레일과 모니터링을 통한 안전한 운영

다음 단계

프로덕션 환경에서의 스케일링 전략
다른 AI 프레임워크와의 통합
에이전트 성능 최적화 및 비용 관리
엔터프라이즈급 보안 및 규정 준수

AI 에이전트는 이제 실험실의 개념이 아닌 실용적인 도구입니다. 이 SDK를 활용하여 여러분만의 혁신적인 AI 솔루션을 구축해보세요.

참고 링크: