fix(client,exporter): handle HTTP 429 retry and sanitize JSON
_get_with_retry now retries on HTTP 429 responses, respecting the Retry-After header when present. exporter sanitizes control characters (0x00-0x1F except \n \r \t) in text fields before JSON serialization. fixes #11 fixes #12 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -33,19 +33,42 @@ class GiteaClient:
|
||||
self.session.headers["Authorization"] = f"token {token}"
|
||||
|
||||
def _get_with_retry(self, url: str, params: dict | None = None) -> requests.Response:
|
||||
"""GET avec retry automatique sur timeout.
|
||||
"""GET avec retry automatique sur timeout ET rate limiting (HTTP 429).
|
||||
|
||||
Retente jusqu'a _MAX_RETRIES fois avec backoff lineaire (1s, 2s).
|
||||
Leve requests.Timeout apres epuisement des retries.
|
||||
Si la reponse HTTP est 429 (Too Many Requests), respecte le header
|
||||
Retry-After (en secondes) pour le delai d'attente. Si Retry-After
|
||||
est absent, utilise le backoff lineaire standard.
|
||||
|
||||
Leve requests.Timeout apres epuisement des retries sur timeout.
|
||||
Leve requests.HTTPError apres epuisement des retries sur 429.
|
||||
"""
|
||||
last_exc: requests.Timeout | None = None
|
||||
last_resp: requests.Response | None = None
|
||||
for attempt in range(self._MAX_RETRIES + 1):
|
||||
try:
|
||||
return self.session.get(url, params=params, timeout=self.timeout)
|
||||
resp = self.session.get(url, params=params, timeout=self.timeout)
|
||||
except requests.Timeout as exc:
|
||||
last_exc = exc
|
||||
if attempt < self._MAX_RETRIES:
|
||||
time.sleep(self._RETRY_DELAY * (attempt + 1))
|
||||
continue
|
||||
|
||||
if resp.status_code == 429:
|
||||
last_resp = resp
|
||||
if attempt < self._MAX_RETRIES:
|
||||
retry_after = resp.headers.get("Retry-After")
|
||||
if retry_after is not None:
|
||||
delay = float(retry_after)
|
||||
else:
|
||||
delay = self._RETRY_DELAY * (attempt + 1)
|
||||
time.sleep(delay)
|
||||
continue
|
||||
|
||||
return resp
|
||||
|
||||
if last_resp is not None:
|
||||
last_resp.raise_for_status()
|
||||
raise last_exc # type: ignore[misc]
|
||||
|
||||
def _get_paginated(self, endpoint: str, params: dict | None = None) -> list[dict]:
|
||||
|
||||
@@ -3,18 +3,38 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from dataclasses import asdict
|
||||
|
||||
from gitea_dashboard.collector import RepoData
|
||||
|
||||
# Caracteres de controle ASCII (0x00-0x1F) sauf \t (0x09), \n (0x0A), \r (0x0D)
|
||||
_CONTROL_CHAR_RE = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f]")
|
||||
|
||||
|
||||
def _sanitize_control_chars(text: str) -> str:
|
||||
"""Supprime les caracteres de controle ASCII (0x00-0x1F) sauf \\n, \\r et \\t.
|
||||
|
||||
Ces caracteres peuvent provenir de descriptions de repos Gitea
|
||||
et causent des erreurs JSON ('Invalid control character').
|
||||
"""
|
||||
return _CONTROL_CHAR_RE.sub("", text)
|
||||
|
||||
|
||||
def repos_to_dicts(repos: list[RepoData]) -> list[dict]:
|
||||
"""Convertit une liste de RepoData en liste de dicts serialisables.
|
||||
|
||||
Chaque dict contient toutes les donnees du RepoData,
|
||||
pret pour json.dumps().
|
||||
Sanitize les champs texte (name, full_name, description) pour
|
||||
supprimer les caracteres de controle invalides en JSON.
|
||||
"""
|
||||
return [asdict(repo) for repo in repos]
|
||||
result = []
|
||||
for repo in repos:
|
||||
d = asdict(repo)
|
||||
for field in ("name", "full_name", "description"):
|
||||
if isinstance(d.get(field), str):
|
||||
d[field] = _sanitize_control_chars(d[field])
|
||||
result.append(d)
|
||||
return result
|
||||
|
||||
|
||||
def export_json(repos: list[RepoData], indent: int = 2) -> str:
|
||||
|
||||
Reference in New Issue
Block a user