deps_go/
registry.rs

1//! proxy.golang.org registry client.
2//!
3//! Provides access to Go module proxy via:
4//! - `/{module}/@v/list` - list all versions
5//! - `/{module}/@v/{version}.info` - version metadata
6//! - `/{module}/@v/{version}.mod` - go.mod file
7//! - `/{module}/@latest` - latest version info
8//!
9//! All HTTP requests are cached aggressively using ETag/Last-Modified headers.
10//!
11//! # Examples
12//!
13//! ```no_run
14//! use deps_go::GoRegistry;
15//! use deps_core::HttpCache;
16//! use std::sync::Arc;
17//!
18//! #[tokio::main]
19//! async fn main() {
20//!     let cache = Arc::new(HttpCache::new());
21//!     let registry = GoRegistry::new(cache);
22//!
23//!     let versions = registry.get_versions("github.com/gin-gonic/gin").await.unwrap();
24//!     println!("Latest gin: {}", versions[0].version);
25//! }
26//! ```
27
28use crate::error::{GoError, Result};
29use crate::types::GoVersion;
30use crate::version::{escape_module_path, is_pseudo_version};
31use deps_core::HttpCache;
32use serde::Deserialize;
33use std::any::Any;
34use std::sync::Arc;
35
36const PROXY_BASE: &str = "https://proxy.golang.org";
37
38/// Base URL for Go package documentation
39pub const PKG_GO_DEV_URL: &str = "https://pkg.go.dev";
40
41/// Maximum allowed module path length to prevent DoS
42const MAX_MODULE_PATH_LENGTH: usize = 500;
43
44/// Maximum allowed version string length
45const MAX_VERSION_LENGTH: usize = 128;
46
47/// Validates a module path for length and basic format.
48///
49/// # Errors
50///
51/// Returns error if:
52/// - Path is empty
53/// - Path exceeds MAX_MODULE_PATH_LENGTH
54fn validate_module_path(module_path: &str) -> Result<()> {
55    if module_path.is_empty() {
56        return Err(GoError::InvalidModulePath("module path is empty".into()));
57    }
58
59    if module_path.len() > MAX_MODULE_PATH_LENGTH {
60        return Err(GoError::InvalidModulePath(format!(
61            "module path exceeds maximum length of {MAX_MODULE_PATH_LENGTH} characters"
62        )));
63    }
64
65    Ok(())
66}
67
68/// Validates a version string for length and basic format.
69///
70/// # Errors
71///
72/// Returns error if:
73/// - Version is empty
74/// - Version exceeds MAX_VERSION_LENGTH
75/// - Version contains path traversal sequences
76fn validate_version_string(version: &str) -> Result<()> {
77    if version.is_empty() {
78        return Err(GoError::InvalidVersionSpecifier {
79            specifier: version.to_string(),
80            message: "version string is empty".into(),
81        });
82    }
83
84    if version.len() > MAX_VERSION_LENGTH {
85        return Err(GoError::InvalidVersionSpecifier {
86            specifier: version.to_string(),
87            message: format!(
88                "version string exceeds maximum length of {MAX_VERSION_LENGTH} characters"
89            ),
90        });
91    }
92
93    // Check for path traversal attempts
94    if version.contains("..") || version.contains('/') || version.contains('\\') {
95        return Err(GoError::InvalidVersionSpecifier {
96            specifier: version.to_string(),
97            message: "version string contains invalid characters".into(),
98        });
99    }
100
101    Ok(())
102}
103
104/// Returns the URL for a module's documentation page on pkg.go.dev.
105pub fn package_url(module_path: &str) -> String {
106    format!("{PKG_GO_DEV_URL}/{module_path}")
107}
108
109/// Client for interacting with proxy.golang.org.
110///
111/// Uses the Go module proxy protocol for version lookups and metadata.
112/// All requests are cached via the provided HttpCache.
113#[derive(Clone)]
114pub struct GoRegistry {
115    cache: Arc<HttpCache>,
116}
117
118impl GoRegistry {
119    /// Creates a new Go registry client with the given HTTP cache.
120    pub const fn new(cache: Arc<HttpCache>) -> Self {
121        Self { cache }
122    }
123
124    /// Fetches all versions for a module from the `/@v/list` endpoint.
125    ///
126    /// Returns versions in registry order (not sorted). Includes pseudo-versions.
127    ///
128    /// # Errors
129    ///
130    /// Returns an error if:
131    /// - HTTP request fails
132    /// - Response body is invalid UTF-8
133    /// - Module does not exist (404)
134    /// - Module path is invalid or too long
135    ///
136    /// # Examples
137    ///
138    /// ```no_run
139    /// # use deps_go::GoRegistry;
140    /// # use deps_core::HttpCache;
141    /// # use std::sync::Arc;
142    /// # #[tokio::main]
143    /// # async fn main() {
144    /// let cache = Arc::new(HttpCache::new());
145    /// let registry = GoRegistry::new(cache);
146    ///
147    /// let versions = registry.get_versions("github.com/gin-gonic/gin").await.unwrap();
148    /// assert!(!versions.is_empty());
149    /// # }
150    /// ```
151    pub async fn get_versions(&self, module_path: &str) -> Result<Vec<GoVersion>> {
152        validate_module_path(module_path)?;
153
154        let escaped = escape_module_path(module_path);
155        let url = format!("{PROXY_BASE}/{escaped}/@v/list");
156
157        let data = self
158            .cache
159            .get_cached(&url)
160            .await
161            .map_err(|e| GoError::RegistryError {
162                module: module_path.to_string(),
163                source: Box::new(e),
164            })?;
165
166        parse_version_list(&data)
167    }
168
169    /// Fetches version metadata from the `/@v/{version}.info` endpoint.
170    ///
171    /// Returns version with timestamp information.
172    ///
173    /// # Errors
174    ///
175    /// Returns an error if:
176    /// - HTTP request fails
177    /// - JSON parsing fails
178    /// - Module path or version string is invalid
179    ///
180    /// # Examples
181    ///
182    /// ```no_run
183    /// # use deps_go::GoRegistry;
184    /// # use deps_core::HttpCache;
185    /// # use std::sync::Arc;
186    /// # #[tokio::main]
187    /// # async fn main() {
188    /// let cache = Arc::new(HttpCache::new());
189    /// let registry = GoRegistry::new(cache);
190    ///
191    /// let info = registry.get_version_info("github.com/gin-gonic/gin", "v1.9.1").await.unwrap();
192    /// assert_eq!(info.version, "v1.9.1");
193    /// # }
194    /// ```
195    pub async fn get_version_info(&self, module_path: &str, version: &str) -> Result<GoVersion> {
196        validate_module_path(module_path)?;
197        validate_version_string(version)?;
198
199        let escaped = escape_module_path(module_path);
200        let url = format!("{PROXY_BASE}/{escaped}/@v/{version}.info");
201
202        let data = self
203            .cache
204            .get_cached(&url)
205            .await
206            .map_err(|e| GoError::RegistryError {
207                module: module_path.to_string(),
208                source: Box::new(e),
209            })?;
210
211        parse_version_info(&data)
212    }
213
214    /// Fetches latest version using the `/@latest` endpoint.
215    ///
216    /// Returns the latest stable version (non-pseudo).
217    ///
218    /// # Errors
219    ///
220    /// Returns an error if:
221    /// - HTTP request fails
222    /// - JSON parsing fails
223    /// - Module path is invalid
224    ///
225    /// # Examples
226    ///
227    /// ```no_run
228    /// # use deps_go::GoRegistry;
229    /// # use deps_core::HttpCache;
230    /// # use std::sync::Arc;
231    /// # #[tokio::main]
232    /// # async fn main() {
233    /// let cache = Arc::new(HttpCache::new());
234    /// let registry = GoRegistry::new(cache);
235    ///
236    /// let latest = registry.get_latest("github.com/gin-gonic/gin").await.unwrap();
237    /// assert!(!latest.is_pseudo);
238    /// # }
239    /// ```
240    pub async fn get_latest(&self, module_path: &str) -> Result<GoVersion> {
241        validate_module_path(module_path)?;
242
243        let escaped = escape_module_path(module_path);
244        let url = format!("{PROXY_BASE}/{escaped}/@latest");
245
246        let data = self
247            .cache
248            .get_cached(&url)
249            .await
250            .map_err(|e| GoError::RegistryError {
251                module: module_path.to_string(),
252                source: Box::new(e),
253            })?;
254
255        parse_version_info(&data)
256    }
257
258    /// Fetches the go.mod file for a specific version.
259    ///
260    /// Returns the raw content of the go.mod file.
261    ///
262    /// # Errors
263    ///
264    /// Returns an error if:
265    /// - HTTP request fails
266    /// - Response body is invalid UTF-8
267    /// - Module path or version string is invalid
268    ///
269    /// # Examples
270    ///
271    /// ```no_run
272    /// # use deps_go::GoRegistry;
273    /// # use deps_core::HttpCache;
274    /// # use std::sync::Arc;
275    /// # #[tokio::main]
276    /// # async fn main() {
277    /// let cache = Arc::new(HttpCache::new());
278    /// let registry = GoRegistry::new(cache);
279    ///
280    /// let go_mod = registry.get_go_mod("github.com/gin-gonic/gin", "v1.9.1").await.unwrap();
281    /// assert!(go_mod.contains("module github.com/gin-gonic/gin"));
282    /// # }
283    /// ```
284    pub async fn get_go_mod(&self, module_path: &str, version: &str) -> Result<String> {
285        validate_module_path(module_path)?;
286        validate_version_string(version)?;
287
288        let escaped = escape_module_path(module_path);
289        let url = format!("{PROXY_BASE}/{escaped}/@v/{version}.mod");
290
291        let data = self
292            .cache
293            .get_cached(&url)
294            .await
295            .map_err(|e| GoError::RegistryError {
296                module: module_path.to_string(),
297                source: Box::new(e),
298            })?;
299
300        std::str::from_utf8(&data)
301            .map(std::string::ToString::to_string)
302            .map_err(|e| GoError::CacheError(format!("Invalid UTF-8 in go.mod: {e}")))
303    }
304}
305
306/// Version info response from proxy.golang.org.
307#[derive(Deserialize)]
308struct VersionInfo {
309    #[serde(rename = "Version")]
310    version: String,
311    #[serde(rename = "Time")]
312    time: String,
313}
314
315/// Parses newline-separated version list from `/@v/list` endpoint.
316///
317/// Versions are sorted in descending order (newest first) to ensure
318/// `find_latest_stable` returns the correct latest version.
319fn parse_version_list(data: &[u8]) -> Result<Vec<GoVersion>> {
320    let content = std::str::from_utf8(data).map_err(|e| GoError::InvalidVersionSpecifier {
321        specifier: String::new(),
322        message: format!("Invalid UTF-8 in version list response: {e}"),
323    })?;
324
325    // Parse versions with precomputed sort keys (Schwartzian transform)
326    // This avoids repeated regex/semver parsing during sort comparisons
327    let mut versions_with_keys: Vec<(GoVersion, Option<semver::Version>)> = content
328        .lines()
329        .filter(|line| !line.trim().is_empty())
330        .map(|line| {
331            let is_pseudo = is_pseudo_version(line);
332            let sort_key = parse_sort_key(line, is_pseudo);
333            let version = GoVersion {
334                version: line.to_string(),
335                time: None,
336                is_pseudo,
337                retracted: false,
338            };
339            (version, sort_key)
340        })
341        .collect();
342
343    // Sort by precomputed keys (descending - newest first)
344    versions_with_keys.sort_by(|a, b| match (&b.1, &a.1) {
345        (Some(v1), Some(v2)) => v1.cmp(v2),
346        (Some(_), None) => std::cmp::Ordering::Less,
347        (None, Some(_)) => std::cmp::Ordering::Greater,
348        (None, None) => b.0.version.cmp(&a.0.version),
349    });
350
351    Ok(versions_with_keys.into_iter().map(|(v, _)| v).collect())
352}
353
354/// Parses a version string into a semver::Version for sorting.
355/// Uses precomputed is_pseudo flag to avoid regex during sort.
356fn parse_sort_key(version: &str, is_pseudo: bool) -> Option<semver::Version> {
357    use crate::version::base_version_from_pseudo;
358
359    let clean = version.trim_start_matches('v').replace("+incompatible", "");
360    let cmp_str = if is_pseudo {
361        base_version_from_pseudo(version).unwrap_or(clean)
362    } else {
363        clean
364    };
365
366    // Parse only the X.Y.Z part, ignoring prerelease suffix
367    let base = cmp_str.split('-').next().unwrap_or(&cmp_str);
368    semver::Version::parse(base.trim_start_matches('v')).ok()
369}
370
371/// Parses JSON version info from `/@v/{version}.info` or `/@latest` endpoint.
372fn parse_version_info(data: &[u8]) -> Result<GoVersion> {
373    let info: VersionInfo =
374        serde_json::from_slice(data).map_err(|e| GoError::ApiResponseError {
375            module: String::new(),
376            source: e,
377        })?;
378
379    let is_pseudo = is_pseudo_version(&info.version);
380    Ok(GoVersion {
381        version: info.version,
382        time: Some(info.time),
383        is_pseudo,
384        retracted: false,
385    })
386}
387
388// Implement deps_core::Registry trait for trait object support
389#[async_trait::async_trait]
390impl deps_core::Registry for GoRegistry {
391    async fn get_versions(
392        &self,
393        name: &str,
394    ) -> deps_core::Result<Vec<Box<dyn deps_core::Version>>> {
395        let versions = self.get_versions(name).await?;
396        Ok(versions
397            .into_iter()
398            .map(|v| Box::new(v) as Box<dyn deps_core::Version>)
399            .collect())
400    }
401
402    async fn get_latest_matching(
403        &self,
404        name: &str,
405        _req: &str,
406    ) -> deps_core::Result<Option<Box<dyn deps_core::Version>>> {
407        // Try /@latest first (fast path)
408        if let Ok(version) = self.get_latest(name).await {
409            return Ok(Some(Box::new(version) as Box<dyn deps_core::Version>));
410        }
411        // Fallback to /@v/list (/@latest is optional per Go proxy spec)
412        let versions = self.get_versions(name).await?;
413        let latest = versions.into_iter().find(|v| !v.is_pseudo && !v.retracted);
414        Ok(latest.map(|v| Box::new(v) as Box<dyn deps_core::Version>))
415    }
416
417    async fn search(
418        &self,
419        _query: &str,
420        _limit: usize,
421    ) -> deps_core::Result<Vec<Box<dyn deps_core::Metadata>>> {
422        // proxy.golang.org doesn't support search
423        // Could integrate with pkg.go.dev API in future
424        Ok(vec![])
425    }
426
427    fn package_url(&self, name: &str) -> String {
428        package_url(name)
429    }
430
431    fn as_any(&self) -> &dyn Any {
432        self
433    }
434}
435
436#[cfg(test)]
437mod tests {
438    use super::*;
439
440    #[test]
441    fn test_parse_version_list() {
442        let data = b"v1.0.0\nv1.0.1\nv1.1.0\nv2.0.0\n";
443
444        let versions = parse_version_list(data).unwrap();
445        assert_eq!(versions.len(), 4);
446        // Sorted descending (newest first)
447        assert_eq!(versions[0].version, "v2.0.0");
448        assert_eq!(versions[1].version, "v1.1.0");
449        assert_eq!(versions[2].version, "v1.0.1");
450        assert_eq!(versions[3].version, "v1.0.0");
451        assert!(!versions[0].is_pseudo);
452    }
453
454    #[test]
455    fn test_parse_version_list_with_pseudo() {
456        let data = b"v1.0.0\nv0.0.0-20191109021931-daa7c04131f5\nv1.1.0\n";
457
458        let versions = parse_version_list(data).unwrap();
459        assert_eq!(versions.len(), 3);
460        // Sorted descending: v1.1.0, v1.0.0, v0.0.0-... (pseudo based on v0.0.0)
461        assert_eq!(versions[0].version, "v1.1.0");
462        assert!(!versions[0].is_pseudo);
463        assert_eq!(versions[1].version, "v1.0.0");
464        assert!(!versions[1].is_pseudo);
465        assert!(versions[2].is_pseudo);
466    }
467
468    #[test]
469    fn test_parse_version_list_empty() {
470        let data = b"";
471        let versions = parse_version_list(data).unwrap();
472        assert_eq!(versions.len(), 0);
473    }
474
475    #[test]
476    fn test_parse_version_list_blank_lines() {
477        let data = b"\n\n\n";
478        let versions = parse_version_list(data).unwrap();
479        assert_eq!(versions.len(), 0);
480    }
481
482    #[test]
483    fn test_parse_version_info() {
484        let json = r#"{"Version":"v1.9.1","Time":"2023-07-18T14:30:00Z"}"#;
485        let version = parse_version_info(json.as_bytes()).unwrap();
486        assert_eq!(version.version, "v1.9.1");
487        assert_eq!(version.time, Some("2023-07-18T14:30:00Z".into()));
488        assert!(!version.is_pseudo);
489    }
490
491    #[test]
492    fn test_parse_version_info_pseudo() {
493        let json =
494            r#"{"Version":"v0.0.0-20191109021931-daa7c04131f5","Time":"2019-11-09T02:19:31Z"}"#;
495        let version = parse_version_info(json.as_bytes()).unwrap();
496        assert_eq!(version.version, "v0.0.0-20191109021931-daa7c04131f5");
497        assert!(version.is_pseudo);
498    }
499
500    #[test]
501    fn test_parse_version_info_invalid_json() {
502        let json = b"not json";
503        let result = parse_version_info(json);
504        assert!(result.is_err());
505    }
506
507    #[test]
508    fn test_package_url() {
509        assert_eq!(
510            package_url("github.com/gin-gonic/gin"),
511            "https://pkg.go.dev/github.com/gin-gonic/gin"
512        );
513        assert_eq!(
514            package_url("golang.org/x/crypto"),
515            "https://pkg.go.dev/golang.org/x/crypto"
516        );
517    }
518
519    #[tokio::test]
520    async fn test_registry_creation() {
521        let cache = Arc::new(HttpCache::new());
522        let _registry = GoRegistry::new(cache);
523    }
524
525    #[tokio::test]
526    async fn test_registry_clone() {
527        let cache = Arc::new(HttpCache::new());
528        let registry = GoRegistry::new(cache);
529        let _cloned = registry;
530    }
531
532    #[tokio::test]
533    #[ignore]
534    async fn test_fetch_real_gin_versions() {
535        let cache = Arc::new(HttpCache::new());
536        let registry = GoRegistry::new(cache);
537        let versions = registry
538            .get_versions("github.com/gin-gonic/gin")
539            .await
540            .unwrap();
541
542        assert!(!versions.is_empty());
543        assert!(versions.iter().any(|v| v.version.starts_with("v1.")));
544    }
545
546    #[tokio::test]
547    #[ignore]
548    async fn test_fetch_real_version_info() {
549        let cache = Arc::new(HttpCache::new());
550        let registry = GoRegistry::new(cache);
551        let info = registry
552            .get_version_info("github.com/gin-gonic/gin", "v1.9.1")
553            .await
554            .unwrap();
555
556        assert_eq!(info.version, "v1.9.1");
557        assert!(info.time.is_some());
558    }
559
560    #[tokio::test]
561    #[ignore]
562    async fn test_fetch_real_latest() {
563        let cache = Arc::new(HttpCache::new());
564        let registry = GoRegistry::new(cache);
565        let latest = registry
566            .get_latest("github.com/gin-gonic/gin")
567            .await
568            .unwrap();
569
570        assert!(latest.version.starts_with('v'));
571        assert!(!latest.is_pseudo);
572    }
573
574    #[tokio::test]
575    #[ignore]
576    async fn test_fetch_real_go_mod() {
577        let cache = Arc::new(HttpCache::new());
578        let registry = GoRegistry::new(cache);
579        let go_mod = registry
580            .get_go_mod("github.com/gin-gonic/gin", "v1.9.1")
581            .await
582            .unwrap();
583
584        assert!(go_mod.contains("module github.com/gin-gonic/gin"));
585    }
586
587    #[tokio::test]
588    #[ignore]
589    async fn test_module_not_found() {
590        let cache = Arc::new(HttpCache::new());
591        let registry = GoRegistry::new(cache);
592        let result = registry
593            .get_versions("github.com/nonexistent/module12345")
594            .await;
595        assert!(result.is_err());
596    }
597
598    #[test]
599    fn test_parse_version_list_mixed_stable_and_pseudo() {
600        let data = b"v1.0.0\nv1.1.0-0.20200101000000-abcdefabcdef\nv1.2.0\nv1.2.1-beta.1\n";
601        let versions = parse_version_list(data).unwrap();
602        assert_eq!(versions.len(), 4);
603        // Sorted descending: v1.2.1-beta.1, v1.2.0, v1.1.0-0...(pseudo), v1.0.0
604        assert_eq!(versions[0].version, "v1.2.1-beta.1");
605        assert!(!versions[0].is_pseudo); // prerelease, not pseudo
606        assert_eq!(versions[1].version, "v1.2.0");
607        assert!(!versions[1].is_pseudo);
608        assert!(versions[2].is_pseudo); // pseudo-version based on v1.1.0
609        assert_eq!(versions[3].version, "v1.0.0");
610        assert!(!versions[3].is_pseudo);
611    }
612
613    #[test]
614    fn test_parse_version_list_invalid_utf8() {
615        let data = &[0xFF, 0xFE, 0xFD]; // Invalid UTF-8
616        let result = parse_version_list(data);
617        assert!(result.is_err());
618    }
619
620    #[test]
621    fn test_parse_version_info_missing_fields() {
622        let json = r#"{"Version":"v1.0.0"}"#; // Missing Time field
623        let result = parse_version_info(json.as_bytes());
624        assert!(result.is_err());
625    }
626
627    #[test]
628    fn test_validate_module_path_empty() {
629        let result = validate_module_path("");
630        assert!(result.is_err());
631        assert!(matches!(result, Err(GoError::InvalidModulePath(_))));
632    }
633
634    #[test]
635    fn test_validate_module_path_too_long() {
636        let long_path = "a".repeat(MAX_MODULE_PATH_LENGTH + 1);
637        let result = validate_module_path(&long_path);
638        assert!(result.is_err());
639        assert!(matches!(result, Err(GoError::InvalidModulePath(_))));
640    }
641
642    #[test]
643    fn test_validate_module_path_valid() {
644        let result = validate_module_path("github.com/user/repo");
645        assert!(result.is_ok());
646    }
647
648    #[test]
649    fn test_validate_version_string_empty() {
650        let result = validate_version_string("");
651        assert!(result.is_err());
652        assert!(matches!(
653            result,
654            Err(GoError::InvalidVersionSpecifier { .. })
655        ));
656    }
657
658    #[test]
659    fn test_validate_version_string_too_long() {
660        let long_version = "v".to_string() + &"1".repeat(MAX_VERSION_LENGTH);
661        let result = validate_version_string(&long_version);
662        assert!(result.is_err());
663        assert!(matches!(
664            result,
665            Err(GoError::InvalidVersionSpecifier { .. })
666        ));
667    }
668
669    #[test]
670    fn test_validate_version_string_path_traversal() {
671        let result = validate_version_string("v1.0.0/../etc/passwd");
672        assert!(result.is_err());
673        assert!(matches!(
674            result,
675            Err(GoError::InvalidVersionSpecifier { .. })
676        ));
677    }
678
679    #[test]
680    fn test_validate_version_string_slashes() {
681        let result = validate_version_string("v1.0.0/malicious");
682        assert!(result.is_err());
683
684        let result = validate_version_string("v1.0.0\\malicious");
685        assert!(result.is_err());
686    }
687
688    #[test]
689    fn test_validate_version_string_valid() {
690        let result = validate_version_string("v1.0.0");
691        assert!(result.is_ok());
692
693        let result = validate_version_string("v0.0.0-20191109021931-daa7c04131f5");
694        assert!(result.is_ok());
695    }
696}