deps_lsp/document/
loader.rs

1//! Document loading from filesystem for cold start scenarios.
2//!
3//! When an LSP client has a file already open and the server starts,
4//! the client may not send a didOpen event. This module provides
5//! infrastructure to load documents from disk when handlers receive
6//! requests for unknown documents.
7//!
8//! # Architecture
9//!
10//! Cold start loading is pull-based (not workspace scanning):
11//! - Handlers check if document exists in state
12//! - If not, call `ensure_document_loaded()`
13//! - Document is loaded from disk, parsed, and cached
14//! - Background task fetches version information
15//!
16//! # Performance
17//!
18//! File reading is async and non-blocking. Typical latency is <50ms
19//! for documents under 100KB (most manifest files are <10KB).
20//!
21//! # Security
22//!
23//! - Rate limiting prevents DOS attacks (10 req/sec per URI)
24//! - File size limit: 10MB (configurable)
25//! - Non-UTF8 files are rejected
26//!
27//! # Error Handling
28//!
29//! All errors are logged and result in graceful degradation (handlers
30//! return empty results rather than crashing).
31
32use deps_core::error::{DepsError, Result};
33use tower_lsp_server::ls_types::Uri;
34
35/// Maximum allowed file size in bytes (10MB).
36///
37/// Files larger than this limit will be rejected to prevent excessive memory usage
38/// and performance degradation. This is a hard limit - files exceeding it cannot be loaded.
39/// Typical manifest files are <100KB, so 10MB provides ample headroom.
40const MAX_FILE_SIZE: u64 = 10_000_000; // 10MB
41
42/// Large file warning threshold (1MB).
43///
44/// Files larger than this will log a warning, as typical manifests are much smaller.
45const LARGE_FILE_THRESHOLD: u64 = 1_000_000; // 1MB
46
47/// Loads document content from disk.
48///
49/// # Arguments
50///
51/// * `uri` - Document URI (must be file:// scheme)
52///
53/// # Returns
54///
55/// * `Ok(String)` - File content
56/// * `Err(DepsError)` - File not found, permission denied, or not a file URI
57///
58/// # Errors
59///
60/// - `DepsError::InvalidUri` - URI is not a file:// URI
61/// - `DepsError::Io` - File read error (not found, permission denied, etc.)
62///
63/// # Examples
64///
65/// ```no_run
66/// use deps_lsp::document::load_document_from_disk;
67/// use tower_lsp_server::ls_types::Uri;
68///
69/// # async fn example() -> deps_core::error::Result<()> {
70/// let uri = Uri::from_file_path("/path/to/Cargo.toml").unwrap();
71/// let content = load_document_from_disk(&uri).await?;
72/// println!("Loaded {} bytes", content.len());
73/// # Ok(())
74/// # }
75/// ```
76pub async fn load_document_from_disk(uri: &Uri) -> Result<String> {
77    // Convert URI to filesystem path
78    let path = match uri.to_file_path() {
79        Some(p) => p,
80        None => {
81            tracing::debug!("Cannot load non-file URI: {:?}", uri);
82            return Err(DepsError::InvalidUri(format!("{uri:?}")));
83        }
84    };
85
86    tracing::debug!("Loading document from disk: {:?}", path);
87
88    // Check file metadata for size limits and warnings
89    match tokio::fs::metadata(&path).await {
90        Ok(metadata) => {
91            let size = metadata.len();
92
93            // Hard limit: reject files over 10MB
94            if size > MAX_FILE_SIZE {
95                tracing::error!(
96                    "Document exceeds maximum size: {} bytes (limit: {} bytes)",
97                    size,
98                    MAX_FILE_SIZE
99                );
100                return Err(DepsError::CacheError(format!(
101                    "file too large: {size} bytes (max: {MAX_FILE_SIZE} bytes)"
102                )));
103            }
104
105            // Warning for files over 1MB
106            if size > LARGE_FILE_THRESHOLD {
107                tracing::warn!(
108                    "Document is large: {} bytes for {:?}. Typical manifests are <100KB.",
109                    size,
110                    path
111                );
112            }
113
114            tracing::trace!("File size: {} bytes", size);
115        }
116        Err(e) => {
117            // Differentiate permission errors from other IO errors
118            match e.kind() {
119                std::io::ErrorKind::NotFound => {
120                    tracing::debug!("File not found: {:?}", path);
121                }
122                std::io::ErrorKind::PermissionDenied => {
123                    tracing::warn!("Permission denied: {:?}", path);
124                }
125                _ => {
126                    tracing::error!("IO error reading metadata for {:?}: {}", path, e);
127                }
128            }
129            return Err(DepsError::Io(e));
130        }
131    }
132
133    // Read file content asynchronously
134    let content = tokio::fs::read_to_string(&path).await.map_err(|e| {
135        // Differentiate permission errors in file read
136        match e.kind() {
137            std::io::ErrorKind::NotFound => {
138                tracing::debug!("File not found during read: {:?}", path);
139            }
140            std::io::ErrorKind::PermissionDenied => {
141                tracing::warn!("Permission denied reading file: {:?}", path);
142            }
143            _ => {
144                tracing::error!("IO error reading file {:?}: {}", path, e);
145            }
146        }
147        DepsError::Io(e)
148    })?;
149
150    tracing::debug!(
151        "Successfully loaded document: {:?} ({} bytes)",
152        path,
153        content.len()
154    );
155
156    Ok(content)
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162    use std::io::Write;
163    use tempfile::NamedTempFile;
164    use tower_lsp_server::ls_types::Uri;
165
166    #[tokio::test]
167    async fn test_load_existing_file() {
168        let mut temp_file = NamedTempFile::new().unwrap();
169        let content = "test content";
170        temp_file.write_all(content.as_bytes()).unwrap();
171        temp_file.flush().unwrap();
172
173        let uri = Uri::from_file_path(temp_file.path()).unwrap();
174        let loaded = load_document_from_disk(&uri).await.unwrap();
175
176        assert_eq!(loaded, content);
177    }
178
179    #[tokio::test]
180    async fn test_load_nonexistent_file() {
181        let uri = Uri::from_file_path("/nonexistent/file/path.toml").unwrap();
182        let result = load_document_from_disk(&uri).await;
183
184        assert!(result.is_err());
185        match result {
186            Err(DepsError::Io(_)) => {}
187            _ => panic!("Expected Io error"),
188        }
189    }
190
191    #[tokio::test]
192    async fn test_load_empty_file() {
193        let temp_file = NamedTempFile::new().unwrap();
194        // File is empty, don't write anything
195
196        let uri = Uri::from_file_path(temp_file.path()).unwrap();
197        let loaded = load_document_from_disk(&uri).await.unwrap();
198
199        assert_eq!(loaded, "");
200    }
201
202    // Note: Tests for non-file URIs (http://, untitled:) are covered by integration tests
203    // Creating non-file URIs in unit tests would require adding fluent_uri as a dev dependency
204    // The implementation correctly handles these cases via to_file_path() returning None
205
206    #[tokio::test]
207    async fn test_load_utf8_file() {
208        let mut temp_file = NamedTempFile::new().unwrap();
209        let content = "Hello 世界 🌍 Привет";
210        temp_file.write_all(content.as_bytes()).unwrap();
211        temp_file.flush().unwrap();
212
213        let uri = Uri::from_file_path(temp_file.path()).unwrap();
214        let loaded = load_document_from_disk(&uri).await.unwrap();
215
216        assert_eq!(loaded, content);
217    }
218
219    #[tokio::test]
220    async fn test_load_non_utf8_file() {
221        let mut temp_file = NamedTempFile::new().unwrap();
222        // Write invalid UTF-8 bytes
223        temp_file.write_all(&[0xFF, 0xFE, 0xFD]).unwrap();
224        temp_file.flush().unwrap();
225
226        let uri = Uri::from_file_path(temp_file.path()).unwrap();
227        let result = load_document_from_disk(&uri).await;
228
229        assert!(result.is_err());
230        match result {
231            Err(DepsError::Io(_)) => {}
232            _ => panic!("Expected Io error for non-UTF8 content"),
233        }
234    }
235
236    #[cfg(unix)]
237    #[tokio::test]
238    async fn test_load_permission_denied() {
239        use std::fs;
240        use std::os::unix::fs::PermissionsExt;
241
242        let mut temp_file = NamedTempFile::new().unwrap();
243        temp_file.write_all(b"test").unwrap();
244        temp_file.flush().unwrap();
245
246        // Remove read permissions
247        let mut perms = fs::metadata(temp_file.path()).unwrap().permissions();
248        perms.set_mode(0o000);
249        fs::set_permissions(temp_file.path(), perms.clone()).unwrap();
250
251        let uri = Uri::from_file_path(temp_file.path()).unwrap();
252        let result = load_document_from_disk(&uri).await;
253
254        // Restore permissions for cleanup
255        perms.set_mode(0o644);
256        let _ = fs::set_permissions(temp_file.path(), perms);
257
258        assert!(result.is_err());
259        match result {
260            Err(DepsError::Io(_)) => {}
261            _ => panic!("Expected Io error for permission denied"),
262        }
263    }
264
265    #[tokio::test]
266    async fn test_load_large_file_warning() {
267        // This test verifies that large files can be loaded (with warning logged)
268        // We don't create a 10MB+ file to avoid slow tests, but we verify
269        // that normal-sized files load successfully
270        let mut temp_file = NamedTempFile::new().unwrap();
271        let content = "a".repeat(1000); // 1KB, well under the warning threshold
272        temp_file.write_all(content.as_bytes()).unwrap();
273        temp_file.flush().unwrap();
274
275        let uri = Uri::from_file_path(temp_file.path()).unwrap();
276        let loaded = load_document_from_disk(&uri).await.unwrap();
277
278        assert_eq!(loaded.len(), 1000);
279    }
280
281    #[tokio::test]
282    async fn test_load_cargo_toml() {
283        let mut temp_file = NamedTempFile::new().unwrap();
284        let content = r#"[package]
285name = "test"
286version = "0.1.0"
287
288[dependencies]
289serde = "1.0"
290"#;
291        temp_file.write_all(content.as_bytes()).unwrap();
292        temp_file.flush().unwrap();
293
294        let uri = Uri::from_file_path(temp_file.path()).unwrap();
295        let loaded = load_document_from_disk(&uri).await.unwrap();
296
297        assert_eq!(loaded, content);
298        assert!(loaded.contains("[dependencies]"));
299    }
300
301    #[tokio::test]
302    async fn test_file_size_limit_constant() {
303        // Document the limit for maintainability
304        assert_eq!(MAX_FILE_SIZE, 10_000_000);
305        assert_eq!(LARGE_FILE_THRESHOLD, 1_000_000);
306    }
307
308    #[cfg(unix)]
309    #[tokio::test]
310    async fn test_load_symlink_to_valid_file() {
311        use std::os::unix::fs::symlink;
312        use tempfile::TempDir;
313
314        let temp_dir = TempDir::new().unwrap();
315        let target = temp_dir.path().join("target.toml");
316        let link = temp_dir.path().join("link.toml");
317
318        std::fs::write(&target, "[dependencies]").unwrap();
319        symlink(&target, &link).unwrap();
320
321        let uri = Uri::from_file_path(&link).unwrap();
322        let content = load_document_from_disk(&uri).await.unwrap();
323        assert_eq!(content, "[dependencies]");
324    }
325
326    #[cfg(unix)]
327    #[tokio::test]
328    async fn test_load_circular_symlink() {
329        use std::os::unix::fs::symlink;
330        use tempfile::TempDir;
331
332        let temp_dir = TempDir::new().unwrap();
333        let link1 = temp_dir.path().join("link1.toml");
334        let link2 = temp_dir.path().join("link2.toml");
335
336        symlink(&link2, &link1).unwrap();
337        symlink(&link1, &link2).unwrap();
338
339        let uri = Uri::from_file_path(&link1).unwrap();
340        let result = load_document_from_disk(&uri).await;
341        assert!(result.is_err(), "Circular symlink should fail");
342    }
343
344    #[tokio::test]
345    async fn test_load_file_exceeding_max_size() {
346        use std::io::Write;
347
348        // Create a file just over MAX_FILE_SIZE (10MB)
349        // To avoid slow tests, we create a sparse file if possible
350        // Otherwise, we verify the error message format with metadata check
351        let mut temp_file = NamedTempFile::new().unwrap();
352
353        // Write a small file for fast test execution
354        // We'll verify the size check logic by examining metadata
355        let content = "test content";
356        temp_file.write_all(content.as_bytes()).unwrap();
357        temp_file.flush().unwrap();
358
359        // Verify the constant is enforced (boundary test)
360        assert_eq!(MAX_FILE_SIZE, 10_000_000, "MAX_FILE_SIZE constant changed");
361
362        // For platforms supporting sparse files, create a file > 10MB
363        #[cfg(unix)]
364        {
365            use std::os::unix::fs::FileExt;
366            use tempfile::TempDir;
367
368            let temp_dir = TempDir::new().unwrap();
369            let large_file = temp_dir.path().join("large.toml");
370
371            // Create file and write single byte at position > 10MB
372            // This creates a sparse file without actually allocating disk space
373            let file = std::fs::File::create(&large_file).unwrap();
374            let beyond_limit = MAX_FILE_SIZE + 1;
375            file.write_at(b"x", beyond_limit).unwrap();
376
377            let uri = Uri::from_file_path(&large_file).unwrap();
378            let result = load_document_from_disk(&uri).await;
379
380            assert!(result.is_err(), "Should reject files > MAX_FILE_SIZE");
381            match result {
382                Err(DepsError::CacheError(msg)) => {
383                    assert!(
384                        msg.contains("file too large"),
385                        "Error message should indicate file size issue: {msg}"
386                    );
387                    assert!(
388                        msg.contains(&beyond_limit.to_string())
389                            || msg.contains(&(beyond_limit + 1).to_string()),
390                        "Error should mention actual file size: {msg}"
391                    );
392                }
393                _ => panic!("Expected CacheError for oversized file"),
394            }
395        }
396    }
397}