Skip to main content

deps_cargo/
registry.rs

1//! crates.io registry client.
2//!
3//! Provides access to crates.io via:
4//! - Sparse index protocol (<https://index.crates.io>) for version lookups
5//! - REST API (<https://crates.io/api/v1>) for search
6//!
7//! All HTTP requests are cached aggressively using ETag/Last-Modified headers.
8//!
9//! # Examples
10//!
11//! ```no_run
12//! use deps_cargo::CratesIoRegistry;
13//! use deps_core::HttpCache;
14//! use std::sync::Arc;
15//!
16//! #[tokio::main]
17//! async fn main() {
18//!     let cache = Arc::new(HttpCache::new());
19//!     let registry = CratesIoRegistry::new(cache);
20//!
21//!     let versions = registry.get_versions("serde").await.unwrap();
22//!     println!("Latest serde: {}", versions[0].num);
23//! }
24//! ```
25
26use crate::types::{CargoVersion, CrateInfo};
27use deps_core::{DepsError, HttpCache, Result};
28use semver::{Version, VersionReq};
29use serde::Deserialize;
30use std::any::Any;
31use std::collections::HashMap;
32use std::sync::Arc;
33
34const SPARSE_INDEX_BASE: &str = "https://index.crates.io";
35const SEARCH_API_BASE: &str = "https://crates.io/api/v1";
36
37/// Base URL for crate pages on crates.io
38pub const CRATES_IO_URL: &str = "https://crates.io/crates";
39
40/// Returns the URL for a crate's page on crates.io.
41pub fn crate_url(name: &str) -> String {
42    format!("{CRATES_IO_URL}/{name}")
43}
44
45/// Client for interacting with crates.io registry.
46///
47/// Uses the sparse index protocol for fast version lookups and the REST API
48/// for package search. All requests are cached via the provided HttpCache.
49#[derive(Clone)]
50pub struct CratesIoRegistry {
51    cache: Arc<HttpCache>,
52}
53
54impl CratesIoRegistry {
55    /// Creates a new registry client with the given HTTP cache.
56    pub const fn new(cache: Arc<HttpCache>) -> Self {
57        Self { cache }
58    }
59
60    /// Fetches all versions for a crate from the sparse index.
61    ///
62    /// Returns versions sorted newest-first. Includes yanked versions.
63    ///
64    /// # Errors
65    ///
66    /// Returns an error if:
67    /// - HTTP request fails
68    /// - Response body is invalid UTF-8
69    /// - JSON parsing fails
70    ///
71    /// # Examples
72    ///
73    /// ```no_run
74    /// # use deps_cargo::CratesIoRegistry;
75    /// # use deps_core::HttpCache;
76    /// # use std::sync::Arc;
77    /// # #[tokio::main]
78    /// # async fn main() {
79    /// let cache = Arc::new(HttpCache::new());
80    /// let registry = CratesIoRegistry::new(cache);
81    ///
82    /// let versions = registry.get_versions("serde").await.unwrap();
83    /// assert!(!versions.is_empty());
84    /// # }
85    /// ```
86    pub async fn get_versions(&self, name: &str) -> Result<Vec<CargoVersion>> {
87        let path = sparse_index_path(name);
88        // Pre-allocate: SPARSE_INDEX_BASE (25 chars) + "/" + path
89        let mut url = String::with_capacity(SPARSE_INDEX_BASE.len() + 1 + path.len());
90        url.push_str(SPARSE_INDEX_BASE);
91        url.push('/');
92        url.push_str(&path);
93
94        let data = self.cache.get_cached(&url).await?;
95
96        parse_index_json(&data, name)
97    }
98
99    /// Finds the latest version matching the given semver requirement.
100    ///
101    /// Only returns non-yanked versions.
102    ///
103    /// # Errors
104    ///
105    /// Returns an error if:
106    /// - Version requirement string is invalid semver
107    /// - HTTP request fails
108    ///
109    /// # Examples
110    ///
111    /// ```no_run
112    /// # use deps_cargo::CratesIoRegistry;
113    /// # use deps_core::HttpCache;
114    /// # use std::sync::Arc;
115    /// # #[tokio::main]
116    /// # async fn main() {
117    /// let cache = Arc::new(HttpCache::new());
118    /// let registry = CratesIoRegistry::new(cache);
119    ///
120    /// let latest = registry.get_latest_matching("serde", "^1.0").await.unwrap();
121    /// assert!(latest.is_some());
122    /// # }
123    /// ```
124    pub async fn get_latest_matching(
125        &self,
126        name: &str,
127        req_str: &str,
128    ) -> Result<Option<CargoVersion>> {
129        let versions = self.get_versions(name).await?;
130
131        let req = req_str
132            .parse::<VersionReq>()
133            .map_err(|e| DepsError::InvalidVersionReq(e.to_string()))?;
134
135        Ok(versions.into_iter().find(|v| {
136            let version = v.num.parse::<Version>().ok();
137            version.is_some_and(|ver| req.matches(&ver) && !v.yanked)
138        }))
139    }
140
141    /// Searches for crates by name/keywords.
142    ///
143    /// Returns up to `limit` results sorted by relevance.
144    ///
145    /// # Errors
146    ///
147    /// Returns an error if:
148    /// - HTTP request fails
149    /// - JSON parsing fails
150    ///
151    /// # Examples
152    ///
153    /// ```no_run
154    /// # use deps_cargo::CratesIoRegistry;
155    /// # use deps_core::HttpCache;
156    /// # use std::sync::Arc;
157    /// # #[tokio::main]
158    /// # async fn main() {
159    /// let cache = Arc::new(HttpCache::new());
160    /// let registry = CratesIoRegistry::new(cache);
161    ///
162    /// let results = registry.search("serde", 10).await.unwrap();
163    /// assert!(!results.is_empty());
164    /// # }
165    /// ```
166    pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<CrateInfo>> {
167        let url = format!(
168            "{}/crates?q={}&per_page={}&sort=downloads",
169            SEARCH_API_BASE,
170            urlencoding::encode(query),
171            limit
172        );
173
174        let data = self.cache.get_cached(&url).await?;
175        parse_search_response(&data)
176    }
177}
178
179/// Converts a crate name to its sparse index path.
180///
181/// Based on Cargo RFC 2789 specification:
182/// - 1 char: "1/{name}"
183/// - 2 chars: "2/{name}"
184/// - 3 chars: "3/{first_char}/{name}"
185/// - 4+ chars: "{first_2}/{next_2}/{name}"
186fn sparse_index_path(name: &str) -> String {
187    let name_lower = name.to_lowercase();
188    let len = name_lower.len();
189
190    match len {
191        1 => {
192            // "1/" + name = 2 + 1 = 3 chars
193            let mut path = String::with_capacity(3);
194            path.push_str("1/");
195            path.push_str(&name_lower);
196            path
197        }
198        2 => {
199            // "2/" + name = 2 + 2 = 4 chars
200            let mut path = String::with_capacity(4);
201            path.push_str("2/");
202            path.push_str(&name_lower);
203            path
204        }
205        3 => {
206            // "3/" + first_char + "/" + name = 2 + 1 + 1 + 3 = 7 chars
207            let mut path = String::with_capacity(7);
208            path.push_str("3/");
209            path.push_str(&name_lower[0..1]);
210            path.push('/');
211            path.push_str(&name_lower);
212            path
213        }
214        _ => {
215            // first_2 + "/" + next_2 + "/" + name = 2 + 1 + 2 + 1 + len
216            let mut path = String::with_capacity(6 + len);
217            path.push_str(&name_lower[0..2]);
218            path.push('/');
219            path.push_str(&name_lower[2..4]);
220            path.push('/');
221            path.push_str(&name_lower);
222            path
223        }
224    }
225}
226
227/// Entry in the sparse index (one line of newline-delimited JSON).
228#[derive(Deserialize)]
229struct IndexEntry {
230    #[serde(rename = "vers")]
231    version: String,
232    #[serde(default)]
233    yanked: bool,
234    #[serde(default)]
235    features: HashMap<String, Vec<String>>,
236}
237
238/// Parses newline-delimited JSON from sparse index.
239fn parse_index_json(data: &[u8], _crate_name: &str) -> Result<Vec<CargoVersion>> {
240    let content = std::str::from_utf8(data)
241        .map_err(|e| DepsError::CacheError(format!("Invalid UTF-8: {e}")))?;
242
243    // Parse versions once and cache the parsed Version for sorting
244    let mut versions_with_parsed: Vec<(CargoVersion, Version)> = content
245        .lines()
246        .filter(|line| !line.trim().is_empty())
247        .filter_map(|line| {
248            let entry: IndexEntry = serde_json::from_str(line).ok()?;
249            let parsed = entry.version.parse::<Version>().ok()?;
250            Some((
251                CargoVersion {
252                    num: entry.version,
253                    yanked: entry.yanked,
254                    features: entry.features,
255                },
256                parsed,
257            ))
258        })
259        .collect();
260
261    // Sort using already-parsed versions (newest first)
262    versions_with_parsed.sort_unstable_by(|a, b| b.1.cmp(&a.1));
263
264    // Extract sorted versions
265    Ok(versions_with_parsed.into_iter().map(|(v, _)| v).collect())
266}
267
268/// Response from crates.io search API.
269#[derive(Deserialize)]
270struct SearchResponse {
271    crates: Vec<SearchCrate>,
272}
273
274/// Crate entry in search response.
275#[derive(Deserialize)]
276struct SearchCrate {
277    name: String,
278    #[serde(default)]
279    description: Option<String>,
280    #[serde(default)]
281    repository: Option<String>,
282    #[serde(default)]
283    documentation: Option<String>,
284    max_version: String,
285}
286
287/// Parses JSON response from crates.io search API.
288fn parse_search_response(data: &[u8]) -> Result<Vec<CrateInfo>> {
289    let response: SearchResponse = serde_json::from_slice(data)?;
290
291    Ok(response
292        .crates
293        .into_iter()
294        .map(|c| CrateInfo {
295            name: c.name,
296            description: c.description,
297            repository: c.repository,
298            documentation: c.documentation,
299            max_version: c.max_version,
300        })
301        .collect())
302}
303
304impl deps_core::Registry for CratesIoRegistry {
305    fn get_versions<'a>(
306        &'a self,
307        name: &'a str,
308    ) -> deps_core::ecosystem::BoxFuture<'a, Result<Vec<Box<dyn deps_core::Version>>>> {
309        Box::pin(async move {
310            let versions = self.get_versions(name).await?;
311            Ok(versions
312                .into_iter()
313                .map(|v| Box::new(v) as Box<dyn deps_core::Version>)
314                .collect())
315        })
316    }
317
318    fn get_latest_matching<'a>(
319        &'a self,
320        name: &'a str,
321        req: &'a str,
322    ) -> deps_core::ecosystem::BoxFuture<'a, Result<Option<Box<dyn deps_core::Version>>>> {
323        Box::pin(async move {
324            let version = self.get_latest_matching(name, req).await?;
325            Ok(version.map(|v| Box::new(v) as Box<dyn deps_core::Version>))
326        })
327    }
328
329    fn search<'a>(
330        &'a self,
331        query: &'a str,
332        limit: usize,
333    ) -> deps_core::ecosystem::BoxFuture<'a, Result<Vec<Box<dyn deps_core::Metadata>>>> {
334        Box::pin(async move {
335            let results = self.search(query, limit).await?;
336            Ok(results
337                .into_iter()
338                .map(|m| Box::new(m) as Box<dyn deps_core::Metadata>)
339                .collect())
340        })
341    }
342
343    fn package_url(&self, name: &str) -> String {
344        crate_url(name)
345    }
346
347    fn as_any(&self) -> &dyn Any {
348        self
349    }
350}
351
352#[cfg(test)]
353mod tests {
354    use super::*;
355
356    #[test]
357    fn test_sparse_index_path() {
358        assert_eq!(sparse_index_path("a"), "1/a");
359        assert_eq!(sparse_index_path("ab"), "2/ab");
360        assert_eq!(sparse_index_path("abc"), "3/a/abc");
361        assert_eq!(sparse_index_path("serde"), "se/rd/serde");
362        assert_eq!(sparse_index_path("tokio"), "to/ki/tokio");
363    }
364
365    #[test]
366    fn test_sparse_index_path_uppercase() {
367        assert_eq!(sparse_index_path("SERDE"), "se/rd/serde");
368    }
369
370    #[test]
371    fn test_parse_index_json() {
372        let json = r#"{"name":"serde","vers":"1.0.0","yanked":false,"features":{},"deps":[]}
373{"name":"serde","vers":"1.0.1","yanked":false,"features":{"derive":["serde_derive"]},"deps":[]}"#;
374
375        let versions = parse_index_json(json.as_bytes(), "serde").unwrap();
376        assert_eq!(versions.len(), 2);
377        assert_eq!(versions[0].num, "1.0.1");
378        assert_eq!(versions[1].num, "1.0.0");
379        assert!(!versions[0].yanked);
380    }
381
382    #[test]
383    fn test_parse_index_json_with_yanked() {
384        let json = r#"{"name":"test","vers":"0.1.0","yanked":true,"features":{},"deps":[]}
385{"name":"test","vers":"0.2.0","yanked":false,"features":{},"deps":[]}"#;
386
387        let versions = parse_index_json(json.as_bytes(), "test").unwrap();
388        assert_eq!(versions.len(), 2);
389        assert!(versions[1].yanked);
390        assert!(!versions[0].yanked);
391    }
392
393    #[test]
394    fn test_parse_search_response() {
395        let json = r#"{
396            "crates": [
397                {
398                    "name": "serde",
399                    "description": "A serialization framework",
400                    "repository": "https://github.com/serde-rs/serde",
401                    "documentation": "https://docs.rs/serde",
402                    "max_version": "1.0.214"
403                }
404            ]
405        }"#;
406
407        let results = parse_search_response(json.as_bytes()).unwrap();
408        assert_eq!(results.len(), 1);
409        assert_eq!(results[0].name, "serde");
410        assert_eq!(results[0].max_version, "1.0.214");
411    }
412
413    #[tokio::test]
414    #[ignore]
415    async fn test_fetch_real_serde_versions() {
416        let cache = Arc::new(HttpCache::new());
417        let registry = CratesIoRegistry::new(cache);
418        let versions = registry.get_versions("serde").await.unwrap();
419
420        assert!(!versions.is_empty());
421        assert!(versions.iter().any(|v| v.num.starts_with("1.")));
422    }
423
424    #[tokio::test]
425    #[ignore]
426    async fn test_search_real() {
427        let cache = Arc::new(HttpCache::new());
428        let registry = CratesIoRegistry::new(cache);
429        let results = registry.search("serde", 5).await.unwrap();
430
431        assert!(!results.is_empty());
432        assert!(results.iter().any(|r| r.name == "serde"));
433    }
434
435    #[tokio::test]
436    #[ignore]
437    async fn test_get_latest_matching_real() {
438        let cache = Arc::new(HttpCache::new());
439        let registry = CratesIoRegistry::new(cache);
440        let latest = registry.get_latest_matching("serde", "^1.0").await.unwrap();
441
442        assert!(latest.is_some());
443        let version = latest.unwrap();
444        assert!(version.num.starts_with("1."));
445        assert!(!version.yanked);
446    }
447
448    #[test]
449    fn test_parse_index_json_empty() {
450        let json = "";
451        let versions = parse_index_json(json.as_bytes(), "test").unwrap();
452        assert_eq!(versions.len(), 0);
453    }
454
455    #[test]
456    fn test_parse_index_json_blank_lines() {
457        let json = "\n\n\n";
458        let versions = parse_index_json(json.as_bytes(), "test").unwrap();
459        assert_eq!(versions.len(), 0);
460    }
461
462    #[test]
463    fn test_parse_index_json_invalid_version() {
464        let json = r#"{"name":"test","vers":"invalid","yanked":false,"features":{},"deps":[]}"#;
465        let versions = parse_index_json(json.as_bytes(), "test").unwrap();
466        assert_eq!(versions.len(), 0);
467    }
468
469    #[test]
470    fn test_parse_index_json_mixed_valid_invalid() {
471        let json = r#"{"name":"test","vers":"1.0.0","yanked":false,"features":{},"deps":[]}
472{"name":"test","vers":"invalid","yanked":false,"features":{},"deps":[]}
473{"name":"test","vers":"2.0.0","yanked":false,"features":{},"deps":[]}"#;
474
475        let versions = parse_index_json(json.as_bytes(), "test").unwrap();
476        assert_eq!(versions.len(), 2);
477        assert_eq!(versions[0].num, "2.0.0");
478        assert_eq!(versions[1].num, "1.0.0");
479    }
480
481    #[test]
482    fn test_parse_index_json_with_features() {
483        let json = r#"{"name":"test","vers":"1.0.0","yanked":false,"features":{"default":["std"],"std":[]},"deps":[]}"#;
484
485        let versions = parse_index_json(json.as_bytes(), "test").unwrap();
486        assert_eq!(versions.len(), 1);
487        assert_eq!(versions[0].features.len(), 2);
488        assert!(versions[0].features.contains_key("default"));
489        assert!(versions[0].features.contains_key("std"));
490    }
491
492    #[test]
493    fn test_parse_search_response_empty() {
494        let json = r#"{"crates": []}"#;
495        let results = parse_search_response(json.as_bytes()).unwrap();
496        assert_eq!(results.len(), 0);
497    }
498
499    #[test]
500    fn test_parse_search_response_missing_optional_fields() {
501        let json = r#"{
502            "crates": [
503                {
504                    "name": "minimal",
505                    "max_version": "1.0.0"
506                }
507            ]
508        }"#;
509
510        let results = parse_search_response(json.as_bytes()).unwrap();
511        assert_eq!(results.len(), 1);
512        assert_eq!(results[0].name, "minimal");
513        assert_eq!(results[0].description, None);
514        assert_eq!(results[0].repository, None);
515    }
516
517    #[test]
518    fn test_sparse_index_path_single_char() {
519        assert_eq!(sparse_index_path("x"), "1/x");
520        assert_eq!(sparse_index_path("z"), "1/z");
521    }
522
523    #[test]
524    fn test_sparse_index_path_two_chars() {
525        assert_eq!(sparse_index_path("xy"), "2/xy");
526        assert_eq!(sparse_index_path("ab"), "2/ab");
527    }
528
529    #[test]
530    fn test_sparse_index_path_three_chars() {
531        assert_eq!(sparse_index_path("xyz"), "3/x/xyz");
532        assert_eq!(sparse_index_path("foo"), "3/f/foo");
533    }
534
535    #[test]
536    fn test_sparse_index_path_long_name() {
537        assert_eq!(
538            sparse_index_path("very-long-crate-name"),
539            "ve/ry/very-long-crate-name"
540        );
541    }
542
543    #[test]
544    fn test_sparse_index_path_numbers() {
545        assert_eq!(sparse_index_path("1234"), "12/34/1234");
546    }
547
548    #[test]
549    fn test_sparse_index_path_mixed_case() {
550        assert_eq!(sparse_index_path("MyPackage"), "my/pa/mypackage");
551        assert_eq!(sparse_index_path("UPPERCASE"), "up/pe/uppercase");
552    }
553
554    #[test]
555    fn test_crate_url() {
556        assert_eq!(crate_url("serde"), "https://crates.io/crates/serde");
557        assert_eq!(crate_url("tokio"), "https://crates.io/crates/tokio");
558    }
559
560    #[test]
561    fn test_crate_url_with_hyphens() {
562        assert_eq!(
563            crate_url("serde-json"),
564            "https://crates.io/crates/serde-json"
565        );
566    }
567
568    #[tokio::test]
569    async fn test_registry_creation() {
570        let cache = Arc::new(HttpCache::new());
571        let _registry = CratesIoRegistry::new(cache);
572    }
573
574    #[tokio::test]
575    async fn test_registry_clone() {
576        let cache = Arc::new(HttpCache::new());
577        let registry = CratesIoRegistry::new(cache);
578        let _cloned = registry;
579    }
580}