changes in response to PR review

This commit is contained in:
Mick Grove 2026-04-08 13:14:39 -07:00
commit a0934737dc
3 changed files with 70 additions and 17 deletions

View file

@ -251,7 +251,7 @@ mod tests {
.unwrap();
assert!(
texts.iter().any(|text| text.contains("<script> = const auth0_client_secret")),
texts.iter().any(|text| text.contains("auth0_client_secret = secret-value")),
"expected uppercase script tag to be handled like lowercase script"
);
assert!(
@ -260,6 +260,33 @@ mod tests {
);
}
#[test]
fn html_comment_only_script_context_is_ignored() {
let source = br#"
<html>
<body>
<script>// AIzaSyBUPHAjZl3n8Eza66ka6B78iVyPteC5MgM</script>
<div>visible text</div>
</body>
</html>
"#;
let mut texts = Vec::new();
stream_context_candidates(source, &Language::Html, |text| {
texts.push(text.to_string());
true
})
.unwrap();
assert!(
!texts.iter().any(|text| text.contains("AIzaSyBUPHAjZl3n8Eza66ka6B78iVyPteC5MgM")),
"expected commented-out script secrets to stay ignored"
);
assert!(
texts.iter().any(|text| text.contains("div = visible text")),
"expected visible non-script HTML text to remain available for verification"
);
}
#[test]
fn comment_only_python_context_is_ignored() {
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));

View file

@ -1,5 +1,5 @@
use anyhow::Result;
use tl::ParserOptions;
use tl::{HTMLTag, Node, Parser, ParserOptions};
use super::{css, lexer, Language};
@ -35,25 +35,27 @@ where
}
}
let inner_text = tag.inner_text(parser).trim().to_string();
match normalized_tag_name.as_str() {
"script" => {
let candidate = format!("<script> = {inner_text}");
if !inner_text.is_empty() && !sink(&candidate) {
return Ok(());
let script_text = tag.inner_text(parser);
let script_text = script_text.trim();
if !script_text.is_empty() {
lexer::stream_context_candidates(
script_text.as_bytes(),
&Language::JavaScript,
sink,
)?;
}
lexer::stream_context_candidates(
inner_text.as_bytes(),
&Language::JavaScript,
sink,
)?;
}
"style" => {
if !inner_text.is_empty() {
css::stream_context_candidates(inner_text.as_bytes(), sink)?;
let style_text = tag.inner_text(parser);
let style_text = style_text.trim();
if !style_text.is_empty() {
css::stream_context_candidates(style_text.as_bytes(), sink)?;
}
}
_ => {
let inner_text = text_without_embedded_code(tag, parser);
if !inner_text.is_empty() && !sink(&format!("{tag_name} = {inner_text}")) {
return Ok(());
}
@ -63,3 +65,31 @@ where
Ok(())
}
fn text_without_embedded_code(tag: &HTMLTag<'_>, parser: &Parser<'_>) -> String {
let mut text = String::new();
collect_visible_text(tag, parser, &mut text);
text.trim().to_string()
}
fn collect_visible_text(tag: &HTMLTag<'_>, parser: &Parser<'_>, out: &mut String) {
for handle in tag.children().top().iter() {
let Some(node) = handle.get(parser) else {
continue;
};
match node {
Node::Raw(raw) => out.push_str(raw.as_utf8_str().as_ref()),
Node::Comment(_) => {}
Node::Tag(child) => {
let child_name = child.name().as_utf8_str();
if child_name.eq_ignore_ascii_case("script")
|| child_name.eq_ignore_ascii_case("style")
{
continue;
}
collect_visible_text(&child, parser, out);
}
}
}
}

View file

@ -135,11 +135,7 @@
"Println = >>done<<"
],
"html:testdata/html_embedded_vulnerable.html": [
"html = .auth0_client_secret {\n content: \"abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234\";\n }\n \n \n \n \n const auth0_client_secret = \"abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234\";\n const password = \"superSecret123\";",
"head = .auth0_client_secret {\n content: \"abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234\";\n }",
"content = abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234",
"body = const auth0_client_secret = \"abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234\";\n const password = \"superSecret123\";",
"<script> = const auth0_client_secret = \"abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234\";\n const password = \"superSecret123\";",
"auth0_client_secret = abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234",
"password = superSecret123"
],