deps_cargo/
registry.rs

1//! crates.io registry client.
2//!
3//! Provides access to crates.io via:
4//! - Sparse index protocol (<https://index.crates.io>) for version lookups
5//! - REST API (<https://crates.io/api/v1>) for search
6//!
7//! All HTTP requests are cached aggressively using ETag/Last-Modified headers.
8//!
9//! # Examples
10//!
11//! ```no_run
12//! use deps_cargo::CratesIoRegistry;
13//! use deps_core::HttpCache;
14//! use std::sync::Arc;
15//!
16//! #[tokio::main]
17//! async fn main() {
18//!     let cache = Arc::new(HttpCache::new());
19//!     let registry = CratesIoRegistry::new(cache);
20//!
21//!     let versions = registry.get_versions("serde").await.unwrap();
22//!     println!("Latest serde: {}", versions[0].num);
23//! }
24//! ```
25
26use crate::types::{CargoVersion, CrateInfo};
27use deps_core::{DepsError, HttpCache, Result};
28use semver::{Version, VersionReq};
29use serde::Deserialize;
30use std::any::Any;
31use std::collections::HashMap;
32use std::sync::Arc;
33
34const SPARSE_INDEX_BASE: &str = "https://index.crates.io";
35const SEARCH_API_BASE: &str = "https://crates.io/api/v1";
36
37/// Base URL for crate pages on crates.io
38pub const CRATES_IO_URL: &str = "https://crates.io/crates";
39
40/// Returns the URL for a crate's page on crates.io.
41pub fn crate_url(name: &str) -> String {
42    format!("{CRATES_IO_URL}/{name}")
43}
44
45/// Client for interacting with crates.io registry.
46///
47/// Uses the sparse index protocol for fast version lookups and the REST API
48/// for package search. All requests are cached via the provided HttpCache.
49#[derive(Clone)]
50pub struct CratesIoRegistry {
51    cache: Arc<HttpCache>,
52}
53
54impl CratesIoRegistry {
55    /// Creates a new registry client with the given HTTP cache.
56    pub const fn new(cache: Arc<HttpCache>) -> Self {
57        Self { cache }
58    }
59
60    /// Fetches all versions for a crate from the sparse index.
61    ///
62    /// Returns versions sorted newest-first. Includes yanked versions.
63    ///
64    /// # Errors
65    ///
66    /// Returns an error if:
67    /// - HTTP request fails
68    /// - Response body is invalid UTF-8
69    /// - JSON parsing fails
70    ///
71    /// # Examples
72    ///
73    /// ```no_run
74    /// # use deps_cargo::CratesIoRegistry;
75    /// # use deps_core::HttpCache;
76    /// # use std::sync::Arc;
77    /// # #[tokio::main]
78    /// # async fn main() {
79    /// let cache = Arc::new(HttpCache::new());
80    /// let registry = CratesIoRegistry::new(cache);
81    ///
82    /// let versions = registry.get_versions("serde").await.unwrap();
83    /// assert!(!versions.is_empty());
84    /// # }
85    /// ```
86    pub async fn get_versions(&self, name: &str) -> Result<Vec<CargoVersion>> {
87        let path = sparse_index_path(name);
88        // Pre-allocate: SPARSE_INDEX_BASE (25 chars) + "/" + path
89        let mut url = String::with_capacity(SPARSE_INDEX_BASE.len() + 1 + path.len());
90        url.push_str(SPARSE_INDEX_BASE);
91        url.push('/');
92        url.push_str(&path);
93
94        let data = self.cache.get_cached(&url).await?;
95
96        parse_index_json(&data, name)
97    }
98
99    /// Finds the latest version matching the given semver requirement.
100    ///
101    /// Only returns non-yanked versions.
102    ///
103    /// # Errors
104    ///
105    /// Returns an error if:
106    /// - Version requirement string is invalid semver
107    /// - HTTP request fails
108    ///
109    /// # Examples
110    ///
111    /// ```no_run
112    /// # use deps_cargo::CratesIoRegistry;
113    /// # use deps_core::HttpCache;
114    /// # use std::sync::Arc;
115    /// # #[tokio::main]
116    /// # async fn main() {
117    /// let cache = Arc::new(HttpCache::new());
118    /// let registry = CratesIoRegistry::new(cache);
119    ///
120    /// let latest = registry.get_latest_matching("serde", "^1.0").await.unwrap();
121    /// assert!(latest.is_some());
122    /// # }
123    /// ```
124    pub async fn get_latest_matching(
125        &self,
126        name: &str,
127        req_str: &str,
128    ) -> Result<Option<CargoVersion>> {
129        let versions = self.get_versions(name).await?;
130
131        let req = req_str
132            .parse::<VersionReq>()
133            .map_err(|e| DepsError::InvalidVersionReq(e.to_string()))?;
134
135        Ok(versions.into_iter().find(|v| {
136            let version = v.num.parse::<Version>().ok();
137            version.is_some_and(|ver| req.matches(&ver) && !v.yanked)
138        }))
139    }
140
141    /// Searches for crates by name/keywords.
142    ///
143    /// Returns up to `limit` results sorted by relevance.
144    ///
145    /// # Errors
146    ///
147    /// Returns an error if:
148    /// - HTTP request fails
149    /// - JSON parsing fails
150    ///
151    /// # Examples
152    ///
153    /// ```no_run
154    /// # use deps_cargo::CratesIoRegistry;
155    /// # use deps_core::HttpCache;
156    /// # use std::sync::Arc;
157    /// # #[tokio::main]
158    /// # async fn main() {
159    /// let cache = Arc::new(HttpCache::new());
160    /// let registry = CratesIoRegistry::new(cache);
161    ///
162    /// let results = registry.search("serde", 10).await.unwrap();
163    /// assert!(!results.is_empty());
164    /// # }
165    /// ```
166    pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<CrateInfo>> {
167        let url = format!(
168            "{}/crates?q={}&per_page={}",
169            SEARCH_API_BASE,
170            urlencoding::encode(query),
171            limit
172        );
173
174        let data = self.cache.get_cached(&url).await?;
175        parse_search_response(&data)
176    }
177}
178
179/// Converts a crate name to its sparse index path.
180///
181/// Based on Cargo RFC 2789 specification:
182/// - 1 char: "1/{name}"
183/// - 2 chars: "2/{name}"
184/// - 3 chars: "3/{first_char}/{name}"
185/// - 4+ chars: "{first_2}/{next_2}/{name}"
186fn sparse_index_path(name: &str) -> String {
187    let name_lower = name.to_lowercase();
188    let len = name_lower.len();
189
190    match len {
191        1 => {
192            // "1/" + name = 2 + 1 = 3 chars
193            let mut path = String::with_capacity(3);
194            path.push_str("1/");
195            path.push_str(&name_lower);
196            path
197        }
198        2 => {
199            // "2/" + name = 2 + 2 = 4 chars
200            let mut path = String::with_capacity(4);
201            path.push_str("2/");
202            path.push_str(&name_lower);
203            path
204        }
205        3 => {
206            // "3/" + first_char + "/" + name = 2 + 1 + 1 + 3 = 7 chars
207            let mut path = String::with_capacity(7);
208            path.push_str("3/");
209            path.push_str(&name_lower[0..1]);
210            path.push('/');
211            path.push_str(&name_lower);
212            path
213        }
214        _ => {
215            // first_2 + "/" + next_2 + "/" + name = 2 + 1 + 2 + 1 + len
216            let mut path = String::with_capacity(6 + len);
217            path.push_str(&name_lower[0..2]);
218            path.push('/');
219            path.push_str(&name_lower[2..4]);
220            path.push('/');
221            path.push_str(&name_lower);
222            path
223        }
224    }
225}
226
227/// Entry in the sparse index (one line of newline-delimited JSON).
228#[derive(Deserialize)]
229struct IndexEntry {
230    #[serde(rename = "vers")]
231    version: String,
232    #[serde(default)]
233    yanked: bool,
234    #[serde(default)]
235    features: HashMap<String, Vec<String>>,
236}
237
238/// Parses newline-delimited JSON from sparse index.
239fn parse_index_json(data: &[u8], _crate_name: &str) -> Result<Vec<CargoVersion>> {
240    let content = std::str::from_utf8(data)
241        .map_err(|e| DepsError::CacheError(format!("Invalid UTF-8: {e}")))?;
242
243    // Parse versions once and cache the parsed Version for sorting
244    let mut versions_with_parsed: Vec<(CargoVersion, Version)> = content
245        .lines()
246        .filter(|line| !line.trim().is_empty())
247        .filter_map(|line| {
248            let entry: IndexEntry = serde_json::from_str(line).ok()?;
249            let parsed = entry.version.parse::<Version>().ok()?;
250            Some((
251                CargoVersion {
252                    num: entry.version,
253                    yanked: entry.yanked,
254                    features: entry.features,
255                },
256                parsed,
257            ))
258        })
259        .collect();
260
261    // Sort using already-parsed versions (newest first)
262    versions_with_parsed.sort_unstable_by(|a, b| b.1.cmp(&a.1));
263
264    // Extract sorted versions
265    Ok(versions_with_parsed.into_iter().map(|(v, _)| v).collect())
266}
267
268/// Response from crates.io search API.
269#[derive(Deserialize)]
270struct SearchResponse {
271    crates: Vec<SearchCrate>,
272}
273
274/// Crate entry in search response.
275#[derive(Deserialize)]
276struct SearchCrate {
277    name: String,
278    #[serde(default)]
279    description: Option<String>,
280    #[serde(default)]
281    repository: Option<String>,
282    #[serde(default)]
283    documentation: Option<String>,
284    max_version: String,
285}
286
287/// Parses JSON response from crates.io search API.
288fn parse_search_response(data: &[u8]) -> Result<Vec<CrateInfo>> {
289    let response: SearchResponse = serde_json::from_slice(data)?;
290
291    Ok(response
292        .crates
293        .into_iter()
294        .map(|c| CrateInfo {
295            name: c.name,
296            description: c.description,
297            repository: c.repository,
298            documentation: c.documentation,
299            max_version: c.max_version,
300        })
301        .collect())
302}
303
304// Implement PackageRegistry trait for CratesIoRegistry
305#[async_trait::async_trait]
306impl deps_core::PackageRegistry for CratesIoRegistry {
307    type Version = CargoVersion;
308    type Metadata = CrateInfo;
309    type VersionReq = VersionReq;
310
311    async fn get_versions(&self, name: &str) -> Result<Vec<Self::Version>> {
312        self.get_versions(name).await
313    }
314
315    async fn get_latest_matching(
316        &self,
317        name: &str,
318        req: &Self::VersionReq,
319    ) -> Result<Option<Self::Version>> {
320        self.get_latest_matching(name, &req.to_string()).await
321    }
322
323    async fn search(&self, query: &str, limit: usize) -> Result<Vec<Self::Metadata>> {
324        self.search(query, limit).await
325    }
326}
327
328// Implement VersionInfo trait for CargoVersion
329impl deps_core::VersionInfo for CargoVersion {
330    fn version_string(&self) -> &str {
331        &self.num
332    }
333
334    fn is_yanked(&self) -> bool {
335        self.yanked
336    }
337
338    fn features(&self) -> Vec<String> {
339        self.features.keys().cloned().collect()
340    }
341}
342
343// Implement PackageMetadata trait for CrateInfo
344impl deps_core::PackageMetadata for CrateInfo {
345    fn name(&self) -> &str {
346        &self.name
347    }
348
349    fn description(&self) -> Option<&str> {
350        self.description.as_deref()
351    }
352
353    fn repository(&self) -> Option<&str> {
354        self.repository.as_deref()
355    }
356
357    fn documentation(&self) -> Option<&str> {
358        self.documentation.as_deref()
359    }
360
361    fn latest_version(&self) -> &str {
362        &self.max_version
363    }
364}
365
366// Implement new Registry trait for trait object support
367#[async_trait::async_trait]
368impl deps_core::Registry for CratesIoRegistry {
369    async fn get_versions(&self, name: &str) -> Result<Vec<Box<dyn deps_core::Version>>> {
370        let versions = self.get_versions(name).await?;
371        Ok(versions
372            .into_iter()
373            .map(|v| Box::new(v) as Box<dyn deps_core::Version>)
374            .collect())
375    }
376
377    async fn get_latest_matching(
378        &self,
379        name: &str,
380        req: &str,
381    ) -> Result<Option<Box<dyn deps_core::Version>>> {
382        let version = self.get_latest_matching(name, req).await?;
383        Ok(version.map(|v| Box::new(v) as Box<dyn deps_core::Version>))
384    }
385
386    async fn search(&self, query: &str, limit: usize) -> Result<Vec<Box<dyn deps_core::Metadata>>> {
387        let results = self.search(query, limit).await?;
388        Ok(results
389            .into_iter()
390            .map(|m| Box::new(m) as Box<dyn deps_core::Metadata>)
391            .collect())
392    }
393
394    fn package_url(&self, name: &str) -> String {
395        crate_url(name)
396    }
397
398    fn as_any(&self) -> &dyn Any {
399        self
400    }
401}
402
403#[cfg(test)]
404mod tests {
405    use super::*;
406
407    #[test]
408    fn test_sparse_index_path() {
409        assert_eq!(sparse_index_path("a"), "1/a");
410        assert_eq!(sparse_index_path("ab"), "2/ab");
411        assert_eq!(sparse_index_path("abc"), "3/a/abc");
412        assert_eq!(sparse_index_path("serde"), "se/rd/serde");
413        assert_eq!(sparse_index_path("tokio"), "to/ki/tokio");
414    }
415
416    #[test]
417    fn test_sparse_index_path_uppercase() {
418        assert_eq!(sparse_index_path("SERDE"), "se/rd/serde");
419    }
420
421    #[test]
422    fn test_parse_index_json() {
423        let json = r#"{"name":"serde","vers":"1.0.0","yanked":false,"features":{},"deps":[]}
424{"name":"serde","vers":"1.0.1","yanked":false,"features":{"derive":["serde_derive"]},"deps":[]}"#;
425
426        let versions = parse_index_json(json.as_bytes(), "serde").unwrap();
427        assert_eq!(versions.len(), 2);
428        assert_eq!(versions[0].num, "1.0.1");
429        assert_eq!(versions[1].num, "1.0.0");
430        assert!(!versions[0].yanked);
431    }
432
433    #[test]
434    fn test_parse_index_json_with_yanked() {
435        let json = r#"{"name":"test","vers":"0.1.0","yanked":true,"features":{},"deps":[]}
436{"name":"test","vers":"0.2.0","yanked":false,"features":{},"deps":[]}"#;
437
438        let versions = parse_index_json(json.as_bytes(), "test").unwrap();
439        assert_eq!(versions.len(), 2);
440        assert!(versions[1].yanked);
441        assert!(!versions[0].yanked);
442    }
443
444    #[test]
445    fn test_parse_search_response() {
446        let json = r#"{
447            "crates": [
448                {
449                    "name": "serde",
450                    "description": "A serialization framework",
451                    "repository": "https://github.com/serde-rs/serde",
452                    "documentation": "https://docs.rs/serde",
453                    "max_version": "1.0.214"
454                }
455            ]
456        }"#;
457
458        let results = parse_search_response(json.as_bytes()).unwrap();
459        assert_eq!(results.len(), 1);
460        assert_eq!(results[0].name, "serde");
461        assert_eq!(results[0].max_version, "1.0.214");
462    }
463
464    #[tokio::test]
465    #[ignore]
466    async fn test_fetch_real_serde_versions() {
467        let cache = Arc::new(HttpCache::new());
468        let registry = CratesIoRegistry::new(cache);
469        let versions = registry.get_versions("serde").await.unwrap();
470
471        assert!(!versions.is_empty());
472        assert!(versions.iter().any(|v| v.num.starts_with("1.")));
473    }
474
475    #[tokio::test]
476    #[ignore]
477    async fn test_search_real() {
478        let cache = Arc::new(HttpCache::new());
479        let registry = CratesIoRegistry::new(cache);
480        let results = registry.search("serde", 5).await.unwrap();
481
482        assert!(!results.is_empty());
483        assert!(results.iter().any(|r| r.name == "serde"));
484    }
485
486    #[tokio::test]
487    #[ignore]
488    async fn test_get_latest_matching_real() {
489        let cache = Arc::new(HttpCache::new());
490        let registry = CratesIoRegistry::new(cache);
491        let latest = registry.get_latest_matching("serde", "^1.0").await.unwrap();
492
493        assert!(latest.is_some());
494        let version = latest.unwrap();
495        assert!(version.num.starts_with("1."));
496        assert!(!version.yanked);
497    }
498
499    #[test]
500    fn test_parse_index_json_empty() {
501        let json = "";
502        let versions = parse_index_json(json.as_bytes(), "test").unwrap();
503        assert_eq!(versions.len(), 0);
504    }
505
506    #[test]
507    fn test_parse_index_json_blank_lines() {
508        let json = "\n\n\n";
509        let versions = parse_index_json(json.as_bytes(), "test").unwrap();
510        assert_eq!(versions.len(), 0);
511    }
512
513    #[test]
514    fn test_parse_index_json_invalid_version() {
515        let json = r#"{"name":"test","vers":"invalid","yanked":false,"features":{},"deps":[]}"#;
516        let versions = parse_index_json(json.as_bytes(), "test").unwrap();
517        assert_eq!(versions.len(), 0);
518    }
519
520    #[test]
521    fn test_parse_index_json_mixed_valid_invalid() {
522        let json = r#"{"name":"test","vers":"1.0.0","yanked":false,"features":{},"deps":[]}
523{"name":"test","vers":"invalid","yanked":false,"features":{},"deps":[]}
524{"name":"test","vers":"2.0.0","yanked":false,"features":{},"deps":[]}"#;
525
526        let versions = parse_index_json(json.as_bytes(), "test").unwrap();
527        assert_eq!(versions.len(), 2);
528        assert_eq!(versions[0].num, "2.0.0");
529        assert_eq!(versions[1].num, "1.0.0");
530    }
531
532    #[test]
533    fn test_parse_index_json_with_features() {
534        let json = r#"{"name":"test","vers":"1.0.0","yanked":false,"features":{"default":["std"],"std":[]},"deps":[]}"#;
535
536        let versions = parse_index_json(json.as_bytes(), "test").unwrap();
537        assert_eq!(versions.len(), 1);
538        assert_eq!(versions[0].features.len(), 2);
539        assert!(versions[0].features.contains_key("default"));
540        assert!(versions[0].features.contains_key("std"));
541    }
542
543    #[test]
544    fn test_parse_search_response_empty() {
545        let json = r#"{"crates": []}"#;
546        let results = parse_search_response(json.as_bytes()).unwrap();
547        assert_eq!(results.len(), 0);
548    }
549
550    #[test]
551    fn test_parse_search_response_missing_optional_fields() {
552        let json = r#"{
553            "crates": [
554                {
555                    "name": "minimal",
556                    "max_version": "1.0.0"
557                }
558            ]
559        }"#;
560
561        let results = parse_search_response(json.as_bytes()).unwrap();
562        assert_eq!(results.len(), 1);
563        assert_eq!(results[0].name, "minimal");
564        assert_eq!(results[0].description, None);
565        assert_eq!(results[0].repository, None);
566    }
567
568    #[test]
569    fn test_sparse_index_path_single_char() {
570        assert_eq!(sparse_index_path("x"), "1/x");
571        assert_eq!(sparse_index_path("z"), "1/z");
572    }
573
574    #[test]
575    fn test_sparse_index_path_two_chars() {
576        assert_eq!(sparse_index_path("xy"), "2/xy");
577        assert_eq!(sparse_index_path("ab"), "2/ab");
578    }
579
580    #[test]
581    fn test_sparse_index_path_three_chars() {
582        assert_eq!(sparse_index_path("xyz"), "3/x/xyz");
583        assert_eq!(sparse_index_path("foo"), "3/f/foo");
584    }
585
586    #[test]
587    fn test_sparse_index_path_long_name() {
588        assert_eq!(
589            sparse_index_path("very-long-crate-name"),
590            "ve/ry/very-long-crate-name"
591        );
592    }
593
594    #[test]
595    fn test_sparse_index_path_numbers() {
596        assert_eq!(sparse_index_path("1234"), "12/34/1234");
597    }
598
599    #[test]
600    fn test_sparse_index_path_mixed_case() {
601        assert_eq!(sparse_index_path("MyPackage"), "my/pa/mypackage");
602        assert_eq!(sparse_index_path("UPPERCASE"), "up/pe/uppercase");
603    }
604
605    #[test]
606    fn test_crate_url() {
607        assert_eq!(crate_url("serde"), "https://crates.io/crates/serde");
608        assert_eq!(crate_url("tokio"), "https://crates.io/crates/tokio");
609    }
610
611    #[test]
612    fn test_crate_url_with_hyphens() {
613        assert_eq!(
614            crate_url("serde-json"),
615            "https://crates.io/crates/serde-json"
616        );
617    }
618
619    #[tokio::test]
620    async fn test_registry_creation() {
621        let cache = Arc::new(HttpCache::new());
622        let _registry = CratesIoRegistry::new(cache);
623    }
624
625    #[tokio::test]
626    async fn test_registry_clone() {
627        let cache = Arc::new(HttpCache::new());
628        let registry = CratesIoRegistry::new(cache);
629        let _cloned = registry;
630    }
631}