OpenRouterProgram/api_module.py at master · GearUnclear/OpenRouterProgram · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# api_module.py

import requests
import json
import win32cred

def get_api_key(credential_name: str) -> str:
    """
    Retrieve the API key from Windows Credential Manager.

    Args:
        credential_name (str): The name of the credential to retrieve.

    Returns:
        str: The decoded API key.

    Raises:
        Exception: If the credential cannot be read or decoded.
    """
    try:
        credential = win32cred.CredRead(
            TargetName=credential_name,
            Type=win32cred.CRED_TYPE_GENERIC
        )
        api_key = credential['CredentialBlob'].decode('utf-16')
        return api_key
    except Exception as e:
        raise Exception(f"Error retrieving API key: {e}")

def make_api_request(api_key: str, message_history: list, model: str, temperature: float = 1.0, stream: bool = False, context_length: int | None = None, max_completion_tokens: int | None = None, reasoning_effort: str | None = None, reasoning_max_tokens: int | None = None, exclude_reasoning: bool = False):
    """
    Make a POST request to the OpenRouter API for a specific model.

    Args:
        api_key (str): The API key for authorization.
        message_history (list): The conversation history.
        model (str): The AI model to use for generating a response.
        temperature (float, optional): Sampling temperature. Defaults to 1.0.
        stream (bool, optional): Whether to stream the response in chunks.
        context_length (int, optional): The maximum number of tokens for context. Defaults to None.
        max_completion_tokens (int, optional): The maximum number of tokens for the completion. Defaults to None.
        reasoning_effort (str, optional): The reasoning effort level ("high", "medium", "low"). Defaults to None.
        reasoning_max_tokens (int, optional): The maximum tokens for reasoning. Defaults to None.
        exclude_reasoning (bool, optional): Whether to exclude reasoning tokens from response. Defaults to False.

    Yields:
        str: The content chunk from the AI response.

    Returns:
        dict: The JSON response from the API if stream is False.

    Raises:
        Exception: If the request fails or the response is invalid.
    """
    url = "https://openrouter.ai/api/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": model,
        "messages": message_history,
        "temperature": temperature,
        "stream": stream
    }

    # Add context_length and max_completion_tokens to payload if provided
    if context_length is not None:
        payload["max_tokens"] = context_length
    if max_completion_tokens is not None:
        payload["max_completion_tokens"] = max_completion_tokens

    # Add reasoning parameters if needed
    if reasoning_effort is not None or reasoning_max_tokens is not None or exclude_reasoning:
        payload["reasoning"] = {}
        if reasoning_effort:
            payload["reasoning"]["effort"] = reasoning_effort
        if reasoning_max_tokens:
            payload["reasoning"]["max_tokens"] = reasoning_max_tokens
        if exclude_reasoning:
            payload["reasoning"]["exclude"] = True

    try:
        with requests.post(url, headers=headers, json=payload, stream=stream) as response:
            response.raise_for_status()  # Raises HTTPError for bad responses

            if stream:
                # Stream the response chunk by chunk
                for chunk in response.iter_lines():
                    if chunk:
                        decoded_chunk = chunk.decode("utf-8")
                        if decoded_chunk.strip() == "[DONE]":
                            break
                        else:
                            # Remove 'data: ' prefix if present
                            if decoded_chunk.startswith('data: '):
                                decoded_chunk = decoded_chunk[len('data: '):]

                            try:
                                chunk_data = json.loads(decoded_chunk)
                                # Extract the content from the chunk
                                if "choices" in chunk_data:
                                    delta = chunk_data["choices"][0].get("delta", {})
                                    if "content" in delta:
                                        text = delta["content"]
                                        yield text
                            except json.JSONDecodeError:
                                continue
            else:
                # Non-streaming: return the full JSON response
                return response.json()
    except requests.exceptions.RequestException as e:
        raise Exception(f"API request failed for model '{model}': {e}")
    except json.JSONDecodeError:
        raise Exception("Failed to decode JSON response.")