From a0934737dc063feb9c2b2cd8c816b1153ea1b5ea Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 8 Apr 2026 13:14:39 -0700 Subject: [PATCH] changes in response to PR review --- src/parser.rs | 29 +++++++++- src/parser/html.rs | 54 ++++++++++++++----- testdata/parsers/context_verifier_golden.json | 4 -- 3 files changed, 70 insertions(+), 17 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index ee4b20f..a04c56e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -251,7 +251,7 @@ mod tests { .unwrap(); assert!( - texts.iter().any(|text| text.contains(" +
visible text
+ + + "#; + let mut texts = Vec::new(); + stream_context_candidates(source, &Language::Html, |text| { + texts.push(text.to_string()); + true + }) + .unwrap(); + + assert!( + !texts.iter().any(|text| text.contains("AIzaSyBUPHAjZl3n8Eza66ka6B78iVyPteC5MgM")), + "expected commented-out script secrets to stay ignored" + ); + assert!( + texts.iter().any(|text| text.contains("div = visible text")), + "expected visible non-script HTML text to remain available for verification" + ); + } + #[test] fn comment_only_python_context_is_ignored() { let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")); diff --git a/src/parser/html.rs b/src/parser/html.rs index f10840b..f0ad094 100644 --- a/src/parser/html.rs +++ b/src/parser/html.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use tl::ParserOptions; +use tl::{HTMLTag, Node, Parser, ParserOptions}; use super::{css, lexer, Language}; @@ -35,25 +35,27 @@ where } } - let inner_text = tag.inner_text(parser).trim().to_string(); match normalized_tag_name.as_str() { "script" => { - let candidate = format!("