From b40dea32f44b7deb9ecca32301c8b7c8d8e1dc45 Mon Sep 17 00:00:00 2001 From: sylvain Date: Thu, 12 Mar 2026 19:15:25 +0100 Subject: [PATCH] fix(client,exporter): handle HTTP 429 retry and sanitize JSON _get_with_retry now retries on HTTP 429 responses, respecting the Retry-After header when present. exporter sanitizes control characters (0x00-0x1F except \n \r \t) in text fields before JSON serialization. fixes #11 fixes #12 Co-Authored-By: Claude Opus 4.6 --- src/gitea_dashboard/client.py | 29 +++++++++++-- src/gitea_dashboard/exporter.py | 26 +++++++++-- tests/test_client.py | 77 +++++++++++++++++++++++++++++++++ tests/test_exporter.py | 37 ++++++++++++++++ 4 files changed, 163 insertions(+), 6 deletions(-) diff --git a/src/gitea_dashboard/client.py b/src/gitea_dashboard/client.py index aa3ab71..1249167 100644 --- a/src/gitea_dashboard/client.py +++ b/src/gitea_dashboard/client.py @@ -33,19 +33,42 @@ class GiteaClient: self.session.headers["Authorization"] = f"token {token}" def _get_with_retry(self, url: str, params: dict | None = None) -> requests.Response: - """GET avec retry automatique sur timeout. + """GET avec retry automatique sur timeout ET rate limiting (HTTP 429). Retente jusqu'a _MAX_RETRIES fois avec backoff lineaire (1s, 2s). - Leve requests.Timeout apres epuisement des retries. + Si la reponse HTTP est 429 (Too Many Requests), respecte le header + Retry-After (en secondes) pour le delai d'attente. Si Retry-After + est absent, utilise le backoff lineaire standard. + + Leve requests.Timeout apres epuisement des retries sur timeout. + Leve requests.HTTPError apres epuisement des retries sur 429. """ last_exc: requests.Timeout | None = None + last_resp: requests.Response | None = None for attempt in range(self._MAX_RETRIES + 1): try: - return self.session.get(url, params=params, timeout=self.timeout) + resp = self.session.get(url, params=params, timeout=self.timeout) except requests.Timeout as exc: last_exc = exc if attempt < self._MAX_RETRIES: time.sleep(self._RETRY_DELAY * (attempt + 1)) + continue + + if resp.status_code == 429: + last_resp = resp + if attempt < self._MAX_RETRIES: + retry_after = resp.headers.get("Retry-After") + if retry_after is not None: + delay = float(retry_after) + else: + delay = self._RETRY_DELAY * (attempt + 1) + time.sleep(delay) + continue + + return resp + + if last_resp is not None: + last_resp.raise_for_status() raise last_exc # type: ignore[misc] def _get_paginated(self, endpoint: str, params: dict | None = None) -> list[dict]: diff --git a/src/gitea_dashboard/exporter.py b/src/gitea_dashboard/exporter.py index 62719fe..abdf025 100644 --- a/src/gitea_dashboard/exporter.py +++ b/src/gitea_dashboard/exporter.py @@ -3,18 +3,38 @@ from __future__ import annotations import json +import re from dataclasses import asdict from gitea_dashboard.collector import RepoData +# Caracteres de controle ASCII (0x00-0x1F) sauf \t (0x09), \n (0x0A), \r (0x0D) +_CONTROL_CHAR_RE = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f]") + + +def _sanitize_control_chars(text: str) -> str: + """Supprime les caracteres de controle ASCII (0x00-0x1F) sauf \\n, \\r et \\t. + + Ces caracteres peuvent provenir de descriptions de repos Gitea + et causent des erreurs JSON ('Invalid control character'). + """ + return _CONTROL_CHAR_RE.sub("", text) + def repos_to_dicts(repos: list[RepoData]) -> list[dict]: """Convertit une liste de RepoData en liste de dicts serialisables. - Chaque dict contient toutes les donnees du RepoData, - pret pour json.dumps(). + Sanitize les champs texte (name, full_name, description) pour + supprimer les caracteres de controle invalides en JSON. """ - return [asdict(repo) for repo in repos] + result = [] + for repo in repos: + d = asdict(repo) + for field in ("name", "full_name", "description"): + if isinstance(d.get(field), str): + d[field] = _sanitize_control_chars(d[field]) + result.append(d) + return result def export_json(repos: list[RepoData], indent: int = 2) -> str: diff --git a/tests/test_client.py b/tests/test_client.py index 65f0b67..90a7201 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -210,6 +210,83 @@ class TestGetWithRetry: mock_sleep.assert_any_call(2.0) +class TestGetWithRetry429: + """Test _get_with_retry method (retry on HTTP 429 rate limiting).""" + + def _make_client(self): + return GiteaClient("http://gitea.local:3000", "tok") + + def _make_429_response(self, retry_after=None): + """Create a mock 429 response.""" + resp = MagicMock() + resp.status_code = 429 + resp.headers = {"Retry-After": retry_after} if retry_after is not None else {} + resp.raise_for_status.side_effect = requests.HTTPError( + "429 Too Many Requests", response=resp + ) + return resp + + def _make_200_response(self): + resp = MagicMock() + resp.status_code = 200 + return resp + + @patch("time.sleep") + def test_retry_on_429_with_retry_after(self, mock_sleep): + """429 with Retry-After header: sleeps for the indicated duration, then succeeds.""" + client = self._make_client() + resp_429 = self._make_429_response(retry_after="2") + resp_200 = self._make_200_response() + + with patch.object(client.session, "get", side_effect=[resp_429, resp_200]): + result = client._get_with_retry("http://gitea.local:3000/api/v1/test") + + assert result.status_code == 200 + mock_sleep.assert_called_once_with(2.0) + + @patch("time.sleep") + def test_retry_on_429_without_retry_after(self, mock_sleep): + """429 without Retry-After header: uses linear backoff (1.0s for first retry).""" + client = self._make_client() + resp_429 = self._make_429_response() + resp_200 = self._make_200_response() + + with patch.object(client.session, "get", side_effect=[resp_429, resp_200]): + result = client._get_with_retry("http://gitea.local:3000/api/v1/test") + + assert result.status_code == 200 + mock_sleep.assert_called_once_with(1.0) + + @patch("time.sleep") + def test_retry_on_429_exhausted(self, mock_sleep): + """3 consecutive 429 responses: raises HTTPError after exhausting retries.""" + client = self._make_client() + resp_429 = self._make_429_response() + + with patch.object(client.session, "get", return_value=resp_429): + with pytest.raises(requests.HTTPError): + client._get_with_retry("http://gitea.local:3000/api/v1/test") + + assert mock_sleep.call_count == 2 + + @patch("time.sleep") + def test_retry_on_429_then_timeout(self, mock_sleep): + """429 followed by Timeout: both retry types handled in same loop.""" + client = self._make_client() + resp_429 = self._make_429_response() + resp_200 = self._make_200_response() + + with patch.object( + client.session, + "get", + side_effect=[resp_429, requests.Timeout("timeout"), resp_200], + ): + result = client._get_with_retry("http://gitea.local:3000/api/v1/test") + + assert result.status_code == 200 + assert mock_sleep.call_count == 2 + + class TestGetLatestCommit: """Test get_latest_commit method.""" diff --git a/tests/test_exporter.py b/tests/test_exporter.py index 4b34704..506c0a7 100644 --- a/tests/test_exporter.py +++ b/tests/test_exporter.py @@ -56,6 +56,43 @@ class TestReposToDicts: assert field in d, f"Missing field: {field}" +class TestSanitizeControlChars: + """Test control character sanitization in export.""" + + def test_export_json_sanitizes_control_chars(self): + """Description with control chars (0x00, 0x01, 0x02) produces valid JSON without them.""" + repo = _make_repo(description="hello\x00\x01\x02world") + output = export_json([repo]) + + parsed = json.loads(output) + assert parsed[0]["description"] == "helloworld" + + def test_export_json_preserves_newlines_tabs(self): + """Newlines and tabs are preserved in JSON export (they are valid JSON escapes).""" + repo = _make_repo(description="line1\nline2\ttab") + output = export_json([repo]) + + parsed = json.loads(output) + assert parsed[0]["description"] == "line1\nline2\ttab" + + def test_export_json_unicode_safe(self): + """Description with emojis and accents produces valid JSON.""" + repo = _make_repo(description="Projet avec accents : e, a et emojis 🚀🎉") + output = export_json([repo]) + + parsed = json.loads(output) + assert "🚀" in parsed[0]["description"] + assert "accents" in parsed[0]["description"] + + def test_sanitize_name_and_full_name(self): + """Control chars in name and full_name fields are also sanitized.""" + repo = _make_repo(name="test\x00repo", full_name="admin/test\x01repo") + result = repos_to_dicts([repo]) + + assert result[0]["name"] == "testrepo" + assert result[0]["full_name"] == "admin/testrepo" + + class TestExportJson: """Test export_json function."""