Skip to content

Commit e441e1e

Browse files
authored
fix: improve handling of URLs in file functions. (#369)
1 parent f4c832c commit e441e1e

35 files changed

+647
-195
lines changed

.github/workflows/CI.yml

+2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ jobs:
5454
~/.cargo/git/db/
5555
target/
5656
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
57+
- run: docker pull ubuntu:latest
58+
if: runner.os == 'Linux'
5759
- run: cargo test --all --all-features
5860
- run: cargo test --all-features --examples
5961

wdl-engine/CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2323

2424
### Fixed
2525

26+
* Fixed support for URLs in file stdlib functions (#[369](https://github.com/stjude-rust-labs/wdl/pull/369)).
2627
* Fixed panic when an input path in a complex type did not exist (#[327](https://github.com/stjude-rust-labs/wdl/pull/327)).
2728
* Fixed path translation in nested placeholder evaluation (#[327](https://github.com/stjude-rust-labs/wdl/pull/327)).
2829
* Fixed path translation to mount inputs individually (#[327](https://github.com/stjude-rust-labs/wdl/pull/327)).

wdl-engine/src/diagnostics.rs

-38
Original file line numberDiff line numberDiff line change
@@ -161,44 +161,6 @@ pub fn multiline_string_requirement(span: Span) -> Diagnostic {
161161
Diagnostic::error("use of multi-line strings requires WDL version 1.2").with_highlight(span)
162162
}
163163

164-
/// Creates an "invalid regular expression" diagnostic.
165-
pub fn invalid_regex(error: &regex::Error, span: Span) -> Diagnostic {
166-
Diagnostic::error(error.to_string()).with_highlight(span)
167-
}
168-
169-
/// Creates a "path not relative" diagnostic.
170-
pub fn path_not_relative(span: Span) -> Diagnostic {
171-
Diagnostic::error("path is required to be a relative path, but an absolute path was provided")
172-
.with_highlight(span)
173-
}
174-
175-
/// Creates an "array path not relative" diagnostic.
176-
pub fn array_path_not_relative(index: usize, span: Span) -> Diagnostic {
177-
Diagnostic::error(format!(
178-
"index {index} of the array is required to be a relative path, but an absolute path was \
179-
provided"
180-
))
181-
.with_highlight(span)
182-
}
183-
184-
/// Creates an "invalid glob pattern" diagnostic.
185-
pub fn invalid_glob_pattern(error: &glob::PatternError, span: Span) -> Diagnostic {
186-
Diagnostic::error(format!(
187-
"invalid glob pattern specified: {error}",
188-
error = error.msg
189-
))
190-
.with_highlight(span)
191-
}
192-
193-
/// Creates an "invalid storage unit" diagnostic.
194-
pub fn invalid_storage_unit(unit: &str, span: Span) -> Diagnostic {
195-
Diagnostic::error(format!(
196-
"invalid storage unit `{unit}`; supported units are `B`, `KB`, `K`, `MB`, `M`, `GB`, `G`, \
197-
`TB`, `T`, `KiB`, `Ki`, `MiB`, `Mi`, `GiB`, `Gi`, `TiB`, and `Ti`",
198-
))
199-
.with_highlight(span)
200-
}
201-
202164
/// Creates a "function call failed" diagnostic.
203165
pub fn function_call_failed(name: &str, error: impl fmt::Display, span: Span) -> Diagnostic {
204166
Diagnostic::error(format!("call to function `{name}` failed: {error}")).with_highlight(span)

wdl-engine/src/stdlib/as_map.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ use crate::PrimitiveValue;
1414
use crate::Value;
1515
use crate::diagnostics::function_call_failed;
1616

17+
/// The name of the function defined in this file for use in diagnostics.
18+
const FUNCTION_NAME: &str = "as_map";
19+
1720
/// Used for displaying duplicate key errors.
1821
struct DuplicateKeyError(Option<PrimitiveValue>);
1922

@@ -62,7 +65,7 @@ fn as_map(context: CallContext<'_>) -> Result<Value, Diagnostic> {
6265

6366
if elements.insert(key.clone(), pair.right().clone()).is_some() {
6467
return Err(function_call_failed(
65-
"as_map",
68+
FUNCTION_NAME,
6669
DuplicateKeyError(key),
6770
context.arguments[0].span,
6871
));

wdl-engine/src/stdlib/basename.rs

+73-17
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
use std::path::Path;
44

5+
use url::Url;
56
use wdl_analysis::types::PrimitiveType;
67
use wdl_ast::Diagnostic;
78

@@ -21,32 +22,45 @@ use crate::Value;
2122
///
2223
/// https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#basename
2324
fn basename(context: CallContext<'_>) -> Result<Value, Diagnostic> {
25+
fn remove_suffix<'a>(context: CallContext<'_>, base: &'a str) -> &'a str {
26+
if context.arguments.len() == 2 {
27+
base.strip_suffix(
28+
context
29+
.coerce_argument(1, PrimitiveType::String)
30+
.unwrap_string()
31+
.as_str(),
32+
)
33+
.unwrap_or(base)
34+
} else {
35+
base
36+
}
37+
}
38+
2439
debug_assert!(!context.arguments.is_empty() && context.arguments.len() < 3);
2540
debug_assert!(context.return_type_eq(PrimitiveType::String));
2641

2742
let path = context
2843
.coerce_argument(0, PrimitiveType::String)
2944
.unwrap_string();
3045

31-
match Path::new(path.as_str()).file_name() {
32-
Some(base) => {
33-
let base = base.to_str().expect("should be UTF-8");
34-
let base = if context.arguments.len() == 2 {
35-
base.strip_suffix(
36-
context
37-
.coerce_argument(1, PrimitiveType::String)
38-
.unwrap_string()
39-
.as_str(),
40-
)
41-
.unwrap_or(base)
42-
} else {
43-
base
44-
};
45-
46-
Ok(PrimitiveValue::new_string(base).into())
46+
// Do not attempt to parse absolute Windows paths (and by extension, we do not
47+
// support single-character schemed URLs)
48+
if path.get(1..2) != Some(":") {
49+
if let Ok(url) = path.parse::<Url>() {
50+
let base = url
51+
.path_segments()
52+
.and_then(|mut segments| segments.next_back())
53+
.unwrap_or("");
54+
55+
return Ok(PrimitiveValue::new_string(remove_suffix(context, base)).into());
4756
}
48-
None => Ok(PrimitiveValue::String(path).into()),
4957
}
58+
59+
let base = Path::new(path.as_str())
60+
.file_name()
61+
.map(|f| f.to_str().expect("should be UTF-8"))
62+
.unwrap_or("");
63+
Ok(PrimitiveValue::new_string(remove_suffix(context, base)).into())
5064
}
5165

5266
/// Gets the function describing `basename`.
@@ -97,5 +111,47 @@ mod test {
97111
.await
98112
.unwrap();
99113
assert_eq!(value.unwrap_string().as_str(), "file");
114+
115+
let value = eval_v1_expr(&env, V1::Two, "basename('file.txt', '.jpg')")
116+
.await
117+
.unwrap();
118+
assert_eq!(value.unwrap_string().as_str(), "file.txt");
119+
120+
let value = eval_v1_expr(&env, V1::Two, "basename('https://example.com')")
121+
.await
122+
.unwrap();
123+
assert_eq!(value.unwrap_string().as_str(), "");
124+
125+
let value = eval_v1_expr(&env, V1::Two, "basename('https://example.com/foo')")
126+
.await
127+
.unwrap();
128+
assert_eq!(value.unwrap_string().as_str(), "foo");
129+
130+
let value = eval_v1_expr(
131+
&env,
132+
V1::Two,
133+
"basename('https://example.com/foo/bar/baz.txt')",
134+
)
135+
.await
136+
.unwrap();
137+
assert_eq!(value.unwrap_string().as_str(), "baz.txt");
138+
139+
let value = eval_v1_expr(
140+
&env,
141+
V1::Two,
142+
"basename('https://example.com/foo/bar/baz.txt?foo=baz', '.txt')",
143+
)
144+
.await
145+
.unwrap();
146+
assert_eq!(value.unwrap_string().as_str(), "baz");
147+
148+
let value = eval_v1_expr(
149+
&env,
150+
V1::Two,
151+
"basename('https://example.com/foo/bar/baz.txt#hmm', '.jpg')",
152+
)
153+
.await
154+
.unwrap();
155+
assert_eq!(value.unwrap_string().as_str(), "baz.txt");
100156
}
101157
}

wdl-engine/src/stdlib/chunk.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ use crate::Array;
1010
use crate::Value;
1111
use crate::diagnostics::function_call_failed;
1212

13+
/// The name of the function defined in this file for use in diagnostics.
14+
const FUNCTION_NAME: &str = "chunk";
15+
1316
/// Given an array and a length `n`, splits the array into consecutive,
1417
/// non-overlapping arrays of n elements.
1518
///
@@ -32,7 +35,7 @@ fn chunk(context: CallContext<'_>) -> Result<Value, Diagnostic> {
3235

3336
if size < 0 {
3437
return Err(function_call_failed(
35-
"chunk",
38+
FUNCTION_NAME,
3639
"chunk size cannot be negative",
3740
context.arguments[1].span,
3841
));

wdl-engine/src/stdlib/find.rs

+8-4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@ use super::Function;
1212
use super::Signature;
1313
use crate::PrimitiveValue;
1414
use crate::Value;
15-
use crate::diagnostics::invalid_regex;
15+
use crate::diagnostics::function_call_failed;
16+
17+
/// The name of the function defined in this file for use in diagnostics.
18+
const FUNCTION_NAME: &str = "find";
1619

1720
/// Given two String parameters `input` and `pattern`, searches for the
1821
/// occurrence of `pattern` within `input` and returns the first match or `None`
@@ -30,8 +33,8 @@ fn find(context: CallContext<'_>) -> Result<Value, Diagnostic> {
3033
.coerce_argument(1, PrimitiveType::String)
3134
.unwrap_string();
3235

33-
let regex =
34-
Regex::new(pattern.as_str()).map_err(|e| invalid_regex(&e, context.arguments[1].span))?;
36+
let regex = Regex::new(pattern.as_str())
37+
.map_err(|e| function_call_failed(FUNCTION_NAME, &e, context.arguments[1].span))?;
3538

3639
match regex.find(input.as_str()) {
3740
Some(m) => Ok(PrimitiveValue::new_string(m.as_str()).into()),
@@ -67,7 +70,8 @@ mod test {
6770
.unwrap_err();
6871
assert_eq!(
6972
diagnostic.message(),
70-
"regex parse error:\n ?\n ^\nerror: repetition operator missing expression"
73+
"call to function `find` failed: regex parse error:\n ?\n ^\nerror: repetition \
74+
operator missing expression"
7175
);
7276

7377
let value = eval_v1_expr(&env, V1::Two, "find('hello world', 'e..o')")

0 commit comments

Comments
 (0)