changes in response to PR review

This commit is contained in:
Mick Grove 2026-04-08 13:14:39 -07:00
commit a0934737dc
3 changed files with 70 additions and 17 deletions

View file

@ -1,5 +1,5 @@
use anyhow::Result;
use tl::ParserOptions;
use tl::{HTMLTag, Node, Parser, ParserOptions};
use super::{css, lexer, Language};
@ -35,25 +35,27 @@ where
}
}
let inner_text = tag.inner_text(parser).trim().to_string();
match normalized_tag_name.as_str() {
"script" => {
let candidate = format!("<script> = {inner_text}");
if !inner_text.is_empty() && !sink(&candidate) {
return Ok(());
let script_text = tag.inner_text(parser);
let script_text = script_text.trim();
if !script_text.is_empty() {
lexer::stream_context_candidates(
script_text.as_bytes(),
&Language::JavaScript,
sink,
)?;
}
lexer::stream_context_candidates(
inner_text.as_bytes(),
&Language::JavaScript,
sink,
)?;
}
"style" => {
if !inner_text.is_empty() {
css::stream_context_candidates(inner_text.as_bytes(), sink)?;
let style_text = tag.inner_text(parser);
let style_text = style_text.trim();
if !style_text.is_empty() {
css::stream_context_candidates(style_text.as_bytes(), sink)?;
}
}
_ => {
let inner_text = text_without_embedded_code(tag, parser);
if !inner_text.is_empty() && !sink(&format!("{tag_name} = {inner_text}")) {
return Ok(());
}
@ -63,3 +65,31 @@ where
Ok(())
}
fn text_without_embedded_code(tag: &HTMLTag<'_>, parser: &Parser<'_>) -> String {
let mut text = String::new();
collect_visible_text(tag, parser, &mut text);
text.trim().to_string()
}
fn collect_visible_text(tag: &HTMLTag<'_>, parser: &Parser<'_>, out: &mut String) {
for handle in tag.children().top().iter() {
let Some(node) = handle.get(parser) else {
continue;
};
match node {
Node::Raw(raw) => out.push_str(raw.as_utf8_str().as_ref()),
Node::Comment(_) => {}
Node::Tag(child) => {
let child_name = child.name().as_utf8_str();
if child_name.eq_ignore_ascii_case("script")
|| child_name.eq_ignore_ascii_case("style")
{
continue;
}
collect_visible_text(&child, parser, out);
}
}
}
}