fix(client,exporter): handle HTTP 429 retry and sanitize JSON

_get_with_retry now retries on HTTP 429 responses, respecting the
Retry-After header when present. exporter sanitizes control characters
(0x00-0x1F except \n \r \t) in text fields before JSON serialization.

fixes #11
fixes #12

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
sylvain
2026-03-12 19:15:25 +01:00
parent 9783389bfb
commit b40dea32f4
4 changed files with 163 additions and 6 deletions

View File

@@ -33,19 +33,42 @@ class GiteaClient:
self.session.headers["Authorization"] = f"token {token}"
def _get_with_retry(self, url: str, params: dict | None = None) -> requests.Response:
"""GET avec retry automatique sur timeout.
"""GET avec retry automatique sur timeout ET rate limiting (HTTP 429).
Retente jusqu'a _MAX_RETRIES fois avec backoff lineaire (1s, 2s).
Leve requests.Timeout apres epuisement des retries.
Si la reponse HTTP est 429 (Too Many Requests), respecte le header
Retry-After (en secondes) pour le delai d'attente. Si Retry-After
est absent, utilise le backoff lineaire standard.
Leve requests.Timeout apres epuisement des retries sur timeout.
Leve requests.HTTPError apres epuisement des retries sur 429.
"""
last_exc: requests.Timeout | None = None
last_resp: requests.Response | None = None
for attempt in range(self._MAX_RETRIES + 1):
try:
return self.session.get(url, params=params, timeout=self.timeout)
resp = self.session.get(url, params=params, timeout=self.timeout)
except requests.Timeout as exc:
last_exc = exc
if attempt < self._MAX_RETRIES:
time.sleep(self._RETRY_DELAY * (attempt + 1))
continue
if resp.status_code == 429:
last_resp = resp
if attempt < self._MAX_RETRIES:
retry_after = resp.headers.get("Retry-After")
if retry_after is not None:
delay = float(retry_after)
else:
delay = self._RETRY_DELAY * (attempt + 1)
time.sleep(delay)
continue
return resp
if last_resp is not None:
last_resp.raise_for_status()
raise last_exc # type: ignore[misc]
def _get_paginated(self, endpoint: str, params: dict | None = None) -> list[dict]:

View File

@@ -3,18 +3,38 @@
from __future__ import annotations
import json
import re
from dataclasses import asdict
from gitea_dashboard.collector import RepoData
# Caracteres de controle ASCII (0x00-0x1F) sauf \t (0x09), \n (0x0A), \r (0x0D)
_CONTROL_CHAR_RE = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f]")
def _sanitize_control_chars(text: str) -> str:
"""Supprime les caracteres de controle ASCII (0x00-0x1F) sauf \\n, \\r et \\t.
Ces caracteres peuvent provenir de descriptions de repos Gitea
et causent des erreurs JSON ('Invalid control character').
"""
return _CONTROL_CHAR_RE.sub("", text)
def repos_to_dicts(repos: list[RepoData]) -> list[dict]:
"""Convertit une liste de RepoData en liste de dicts serialisables.
Chaque dict contient toutes les donnees du RepoData,
pret pour json.dumps().
Sanitize les champs texte (name, full_name, description) pour
supprimer les caracteres de controle invalides en JSON.
"""
return [asdict(repo) for repo in repos]
result = []
for repo in repos:
d = asdict(repo)
for field in ("name", "full_name", "description"):
if isinstance(d.get(field), str):
d[field] = _sanitize_control_chars(d[field])
result.append(d)
return result
def export_json(repos: list[RepoData], indent: int = 2) -> str: