deps_pypi/
registry.rs

1//! PyPI registry client.
2//!
3//! Provides access to the PyPI registry via:
4//! - Package metadata API (<https://pypi.org/pypi/{package}/json>) for version lookups
5//! - Simple API (<https://pypi.org/simple/{package}/>) for version index (future)
6//!
7//! All HTTP requests are cached aggressively using ETag/Last-Modified headers.
8
9use crate::error::{PypiError, Result};
10use crate::types::{PypiPackage, PypiVersion};
11use async_trait::async_trait;
12use deps_core::{HttpCache, PackageRegistry};
13use pep440_rs::{Version, VersionSpecifiers};
14use serde::Deserialize;
15use std::any::Any;
16use std::str::FromStr;
17use std::sync::Arc;
18
19const PYPI_BASE: &str = "https://pypi.org/pypi";
20
21/// Base URL for package pages on pypi.org
22pub const PYPI_URL: &str = "https://pypi.org/project";
23
24/// Normalize package name according to PEP 503.
25///
26/// Converts package name to lowercase and replaces underscores/dots with hyphens,
27/// then filters out consecutive hyphens. This ensures consistent package lookups
28/// regardless of how the package name is written.
29///
30/// # Examples
31///
32/// ```
33/// # use deps_pypi::registry::normalize_package_name;
34/// assert_eq!(normalize_package_name("Flask"), "flask");
35/// assert_eq!(normalize_package_name("django_rest_framework"), "django-rest-framework");
36/// assert_eq!(normalize_package_name("Pillow.Image"), "pillow-image");
37/// assert_eq!(normalize_package_name("my__package"), "my-package");
38/// ```
39pub fn normalize_package_name(name: &str) -> String {
40    name.to_lowercase()
41        .replace(&['_', '.'][..], "-")
42        .split('-')
43        .filter(|s| !s.is_empty())
44        .collect::<Vec<_>>()
45        .join("-")
46}
47
48/// Returns the URL for a package's page on pypi.org.
49///
50/// Package names are normalized and URL-encoded to prevent path traversal attacks.
51pub fn package_url(name: &str) -> String {
52    let normalized = normalize_package_name(name);
53    format!("{}/{}", PYPI_URL, urlencoding::encode(&normalized))
54}
55
56/// Client for interacting with the PyPI registry.
57///
58/// Uses the PyPI JSON API for package metadata.
59/// All requests are cached via the provided HttpCache.
60///
61/// # Examples
62///
63/// ```no_run
64/// # use deps_pypi::PypiRegistry;
65/// # use deps_core::HttpCache;
66/// # use std::sync::Arc;
67/// # #[tokio::main]
68/// # async fn main() {
69/// let cache = Arc::new(HttpCache::new());
70/// let registry = PypiRegistry::new(cache);
71///
72/// let versions = registry.get_versions("requests").await.unwrap();
73/// assert!(!versions.is_empty());
74/// # }
75/// ```
76#[derive(Clone)]
77pub struct PypiRegistry {
78    cache: Arc<HttpCache>,
79}
80
81impl PypiRegistry {
82    /// Creates a new PyPI registry client with the given HTTP cache.
83    pub const fn new(cache: Arc<HttpCache>) -> Self {
84        Self { cache }
85    }
86
87    /// Fetches all versions for a package from PyPI.
88    ///
89    /// Returns versions sorted newest-first. Filters out yanked versions by default.
90    ///
91    /// # Errors
92    ///
93    /// Returns an error if:
94    /// - HTTP request fails
95    /// - Response body is invalid UTF-8
96    /// - JSON parsing fails
97    /// - Package does not exist
98    ///
99    /// # Examples
100    ///
101    /// ```no_run
102    /// # use deps_pypi::PypiRegistry;
103    /// # use deps_core::HttpCache;
104    /// # use std::sync::Arc;
105    /// # #[tokio::main]
106    /// # async fn main() {
107    /// let cache = Arc::new(HttpCache::new());
108    /// let registry = PypiRegistry::new(cache);
109    ///
110    /// let versions = registry.get_versions("flask").await.unwrap();
111    /// assert!(!versions.is_empty());
112    /// # }
113    /// ```
114    pub async fn get_versions(&self, name: &str) -> Result<Vec<PypiVersion>> {
115        let normalized = normalize_package_name(name);
116        let url = format!("{PYPI_BASE}/{normalized}/json");
117        let data = self.cache.get_cached(&url).await.map_err(|e| {
118            if e.to_string().contains("404") {
119                PypiError::PackageNotFound {
120                    package: name.to_string(),
121                }
122            } else {
123                PypiError::registry_error(name, e)
124            }
125        })?;
126
127        parse_package_metadata(name, &data)
128    }
129
130    /// Finds the latest version matching the given PEP 440 version specifier.
131    ///
132    /// Only returns non-yanked, non-prerelease versions by default.
133    ///
134    /// # Errors
135    ///
136    /// Returns an error if:
137    /// - HTTP request fails
138    /// - Package does not exist
139    /// - Version specifier is invalid
140    ///
141    /// # Examples
142    ///
143    /// ```no_run
144    /// # use deps_pypi::PypiRegistry;
145    /// # use deps_core::HttpCache;
146    /// # use std::sync::Arc;
147    /// # #[tokio::main]
148    /// # async fn main() {
149    /// let cache = Arc::new(HttpCache::new());
150    /// let registry = PypiRegistry::new(cache);
151    ///
152    /// let latest = registry.get_latest_matching("flask", ">=3.0,<4.0").await.unwrap();
153    /// assert!(latest.is_some());
154    /// # }
155    /// ```
156    pub async fn get_latest_matching(
157        &self,
158        name: &str,
159        req_str: &str,
160    ) -> Result<Option<PypiVersion>> {
161        let versions = self.get_versions(name).await?;
162
163        // PEP 440 uses empty string for "any version"
164        let normalized_req = if req_str == "*" { "" } else { req_str };
165
166        let specs = VersionSpecifiers::from_str(normalized_req).map_err(|e| {
167            PypiError::InvalidVersionSpecifier {
168                specifier: req_str.to_string(),
169                source: e,
170            }
171        })?;
172
173        Ok(versions.into_iter().find(|v| {
174            if let Ok(version) = Version::from_str(&v.version) {
175                specs.contains(&version) && !v.yanked && !v.is_prerelease()
176            } else {
177                false
178            }
179        }))
180    }
181
182    /// Searches for packages by name/keywords.
183    ///
184    /// Note: PyPI does not provide an official search API, so this returns
185    /// an empty result for now. Future implementation could use third-party
186    /// search services or scraping.
187    ///
188    /// # Errors
189    ///
190    /// Currently always returns Ok with empty vector.
191    ///
192    /// # Examples
193    ///
194    /// ```no_run
195    /// # use deps_pypi::PypiRegistry;
196    /// # use deps_core::HttpCache;
197    /// # use std::sync::Arc;
198    /// # #[tokio::main]
199    /// # async fn main() {
200    /// let cache = Arc::new(HttpCache::new());
201    /// let registry = PypiRegistry::new(cache);
202    ///
203    /// let results = registry.search("flask", 10).await.unwrap();
204    /// // Currently returns empty, to be implemented
205    /// # }
206    /// ```
207    pub async fn search(&self, _query: &str, _limit: usize) -> Result<Vec<PypiPackage>> {
208        // TODO: Implement search using third-party API or scraping
209        // PyPI deprecated their XML-RPC search API
210        Ok(Vec::new())
211    }
212
213    /// Fetches package metadata including description and project URLs.
214    ///
215    /// # Errors
216    ///
217    /// Returns an error if:
218    /// - HTTP request fails
219    /// - Package does not exist
220    /// - JSON parsing fails
221    pub async fn get_package_metadata(&self, name: &str) -> Result<PypiPackage> {
222        let normalized = normalize_package_name(name);
223        let url = format!("{PYPI_BASE}/{normalized}/json");
224        let data = self.cache.get_cached(&url).await.map_err(|e| {
225            if e.to_string().contains("404") {
226                PypiError::PackageNotFound {
227                    package: name.to_string(),
228                }
229            } else {
230                PypiError::registry_error(name, e)
231            }
232        })?;
233
234        parse_package_info(name, &data)
235    }
236}
237
238#[async_trait]
239impl PackageRegistry for PypiRegistry {
240    type Version = PypiVersion;
241    type Metadata = PypiPackage;
242    type VersionReq = String;
243
244    async fn get_versions(&self, name: &str) -> deps_core::error::Result<Vec<Self::Version>> {
245        Self::get_versions(self, name)
246            .await
247            .map_err(|e| deps_core::error::DepsError::CacheError(e.to_string()))
248    }
249
250    async fn get_latest_matching(
251        &self,
252        name: &str,
253        req: &Self::VersionReq,
254    ) -> deps_core::error::Result<Option<Self::Version>> {
255        Self::get_latest_matching(self, name, req)
256            .await
257            .map_err(|e| deps_core::error::DepsError::CacheError(e.to_string()))
258    }
259
260    async fn search(
261        &self,
262        query: &str,
263        limit: usize,
264    ) -> deps_core::error::Result<Vec<Self::Metadata>> {
265        Self::search(self, query, limit)
266            .await
267            .map_err(|e| deps_core::error::DepsError::CacheError(e.to_string()))
268    }
269}
270
271// Implement Registry trait for PypiRegistry
272#[async_trait]
273impl deps_core::Registry for PypiRegistry {
274    async fn get_versions(
275        &self,
276        name: &str,
277    ) -> deps_core::error::Result<Vec<Box<dyn deps_core::Version>>> {
278        let versions = Self::get_versions(self, name)
279            .await
280            .map_err(|e| deps_core::error::DepsError::CacheError(e.to_string()))?;
281        Ok(versions
282            .into_iter()
283            .map(|v| Box::new(v) as Box<dyn deps_core::Version>)
284            .collect())
285    }
286
287    async fn get_latest_matching(
288        &self,
289        name: &str,
290        req: &str,
291    ) -> deps_core::error::Result<Option<Box<dyn deps_core::Version>>> {
292        let version = Self::get_latest_matching(self, name, req)
293            .await
294            .map_err(|e| deps_core::error::DepsError::CacheError(e.to_string()))?;
295        Ok(version.map(|v| Box::new(v) as Box<dyn deps_core::Version>))
296    }
297
298    async fn search(
299        &self,
300        query: &str,
301        limit: usize,
302    ) -> deps_core::error::Result<Vec<Box<dyn deps_core::Metadata>>> {
303        let packages = Self::search(self, query, limit)
304            .await
305            .map_err(|e| deps_core::error::DepsError::CacheError(e.to_string()))?;
306        Ok(packages
307            .into_iter()
308            .map(|p| Box::new(p) as Box<dyn deps_core::Metadata>)
309            .collect())
310    }
311
312    fn package_url(&self, name: &str) -> String {
313        package_url(name)
314    }
315
316    fn as_any(&self) -> &dyn Any {
317        self
318    }
319}
320
321// JSON response types
322
323#[derive(Debug, Deserialize)]
324struct PypiResponse {
325    info: PypiInfo,
326    releases: std::collections::HashMap<String, Vec<PypiRelease>>,
327}
328
329#[derive(Debug, Deserialize)]
330struct PypiInfo {
331    name: String,
332    summary: Option<String>,
333    project_urls: Option<std::collections::HashMap<String, String>>,
334    version: String,
335}
336
337#[derive(Debug, Deserialize)]
338struct PypiRelease {
339    yanked: Option<bool>,
340}
341
342/// Parse package metadata from PyPI JSON response.
343fn parse_package_metadata(package_name: &str, data: &[u8]) -> Result<Vec<PypiVersion>> {
344    let response: PypiResponse =
345        serde_json::from_slice(data).map_err(|e| PypiError::api_response_error(package_name, e))?;
346
347    // Parse versions once and cache with the parsed Version for sorting
348    let mut versions_with_parsed: Vec<(PypiVersion, Version)> = response
349        .releases
350        .into_iter()
351        .filter_map(|(version_str, releases)| {
352            // Check if any release file is yanked
353            let yanked = releases.iter().any(|r| r.yanked.unwrap_or(false));
354
355            // Parse version to validate it's a valid PEP 440 version
356            Version::from_str(&version_str).ok().map(|parsed| {
357                (
358                    PypiVersion {
359                        version: version_str,
360                        yanked,
361                    },
362                    parsed,
363                )
364            })
365        })
366        .collect();
367
368    // Sort by version (newest first) using pre-parsed versions
369    versions_with_parsed.sort_by(|a, b| b.1.cmp(&a.1));
370
371    // Extract sorted versions, discarding parsed data
372    let versions: Vec<PypiVersion> = versions_with_parsed.into_iter().map(|(v, _)| v).collect();
373
374    Ok(versions)
375}
376
377/// Parse package info from PyPI JSON response.
378fn parse_package_info(package_name: &str, data: &[u8]) -> Result<PypiPackage> {
379    let response: PypiResponse =
380        serde_json::from_slice(data).map_err(|e| PypiError::api_response_error(package_name, e))?;
381
382    let project_urls = response
383        .info
384        .project_urls
385        .unwrap_or_default()
386        .into_iter()
387        .collect();
388
389    Ok(PypiPackage {
390        name: response.info.name,
391        summary: response.info.summary,
392        project_urls,
393        latest_version: response.info.version,
394    })
395}
396
397#[cfg(test)]
398mod tests {
399    use super::*;
400
401    #[test]
402    fn test_normalize_package_name_lowercase() {
403        assert_eq!(normalize_package_name("Flask"), "flask");
404        assert_eq!(normalize_package_name("DJANGO"), "django");
405        assert_eq!(normalize_package_name("Requests"), "requests");
406    }
407
408    #[test]
409    fn test_normalize_package_name_underscores() {
410        assert_eq!(
411            normalize_package_name("django_rest_framework"),
412            "django-rest-framework"
413        );
414        assert_eq!(normalize_package_name("my_package"), "my-package");
415    }
416
417    #[test]
418    fn test_normalize_package_name_dots() {
419        assert_eq!(normalize_package_name("Pillow.Image"), "pillow-image");
420        assert_eq!(normalize_package_name("zope.interface"), "zope-interface");
421    }
422
423    #[test]
424    fn test_normalize_package_name_consecutive_separators() {
425        assert_eq!(normalize_package_name("my__package"), "my-package");
426        assert_eq!(normalize_package_name("my..package"), "my-package");
427        assert_eq!(normalize_package_name("my_.package"), "my-package");
428    }
429
430    #[test]
431    fn test_normalize_package_name_mixed() {
432        assert_eq!(normalize_package_name("My_Package.Name"), "my-package-name");
433        assert_eq!(
434            normalize_package_name("SOME__Weird.._Package"),
435            "some-weird-package"
436        );
437    }
438
439    #[test]
440    fn test_normalize_package_name_already_normalized() {
441        assert_eq!(normalize_package_name("my-package"), "my-package");
442        assert_eq!(
443            normalize_package_name("django-rest-framework"),
444            "django-rest-framework"
445        );
446    }
447
448    #[test]
449    fn test_normalize_package_name_edge_cases() {
450        assert_eq!(normalize_package_name("a"), "a");
451        assert_eq!(normalize_package_name("A_B_C"), "a-b-c");
452        assert_eq!(normalize_package_name("---"), "");
453    }
454
455    #[test]
456    fn test_normalize_package_name_leading_trailing_separators() {
457        assert_eq!(normalize_package_name("_package_"), "package");
458        assert_eq!(normalize_package_name(".package."), "package");
459        assert_eq!(normalize_package_name("__package__"), "package");
460    }
461
462    #[test]
463    fn test_package_url() {
464        assert_eq!(package_url("requests"), "https://pypi.org/project/requests");
465        assert_eq!(package_url("flask"), "https://pypi.org/project/flask");
466    }
467
468    #[test]
469    fn test_package_url_normalization() {
470        assert_eq!(package_url("Flask"), "https://pypi.org/project/flask");
471        assert_eq!(
472            package_url("django_rest_framework"),
473            "https://pypi.org/project/django-rest-framework"
474        );
475    }
476
477    #[test]
478    fn test_package_url_encoding() {
479        let url = package_url("my-package");
480        assert!(url.starts_with("https://pypi.org/project/"));
481        assert!(url.contains("my-package"));
482    }
483
484    #[test]
485    fn test_parse_package_metadata() {
486        let json = r#"{
487            "info": {
488                "name": "requests",
489                "summary": "Python HTTP for Humans.",
490                "version": "2.28.2",
491                "project_urls": {
492                    "Homepage": "https://requests.readthedocs.io"
493                }
494            },
495            "releases": {
496                "2.28.2": [{"yanked": false}],
497                "2.28.1": [{"yanked": false}],
498                "2.28.0": [{"yanked": true}],
499                "2.27.0": [{"yanked": false}]
500            }
501        }"#;
502
503        let versions = parse_package_metadata("requests", json.as_bytes()).unwrap();
504
505        assert_eq!(versions.len(), 4);
506        assert_eq!(versions[0].version, "2.28.2");
507        assert!(!versions[0].yanked);
508        assert!(versions[2].yanked); // 2.28.0 is yanked
509    }
510
511    #[test]
512    fn test_parse_package_info() {
513        let json = r#"{
514            "info": {
515                "name": "flask",
516                "summary": "A micro web framework",
517                "version": "3.0.0",
518                "project_urls": {
519                    "Documentation": "https://flask.palletsprojects.com/",
520                    "Repository": "https://github.com/pallets/flask"
521                }
522            },
523            "releases": {}
524        }"#;
525
526        let pkg = parse_package_info("flask", json.as_bytes()).unwrap();
527
528        assert_eq!(pkg.name, "flask");
529        assert_eq!(pkg.summary, Some("A micro web framework".to_string()));
530        assert_eq!(pkg.latest_version, "3.0.0");
531        assert_eq!(pkg.project_urls.len(), 2);
532    }
533
534    #[test]
535    fn test_wildcard_specifier_normalization() {
536        // Test that "*" is normalized to empty string for PEP 440 compatibility
537        // The get_latest_matching method normalizes "*" to "" internally
538        let normalized = if "*" == "*" { "" } else { "*" };
539        assert_eq!(normalized, "");
540
541        // Verify that empty string is valid PEP 440 (matches any version)
542        let specs = VersionSpecifiers::from_str("").unwrap();
543        assert!(specs.contains(&Version::from_str("1.0.0").unwrap()));
544        assert!(specs.contains(&Version::from_str("2.5.3").unwrap()));
545        assert!(specs.contains(&Version::from_str("0.0.1").unwrap()));
546    }
547
548    #[test]
549    fn test_prerelease_detection() {
550        let json = r#"{
551            "info": {
552                "name": "test",
553                "version": "1.0.0",
554                "project_urls": null
555            },
556            "releases": {
557                "1.0.0": [{"yanked": false}],
558                "1.0.0a1": [{"yanked": false}],
559                "1.0.0b2": [{"yanked": false}],
560                "1.0.0rc1": [{"yanked": false}]
561            }
562        }"#;
563
564        let versions = parse_package_metadata("test", json.as_bytes()).unwrap();
565
566        let stable: Vec<_> = versions.iter().filter(|v| !v.is_prerelease()).collect();
567        let prerelease: Vec<_> = versions.iter().filter(|v| v.is_prerelease()).collect();
568
569        assert_eq!(stable.len(), 1);
570        assert_eq!(prerelease.len(), 3);
571    }
572}