diff options
author | Opal <847966@proton.me> | 2022-10-15 15:27:41 -0700 |
---|---|---|
committer | Opal <847966@proton.me> | 2022-10-15 15:27:41 -0700 |
commit | a287e741842f67d0a04c48276221d85f16079d55 (patch) | |
tree | 2db596b4ba6709fe10168942bcd0fc5ed2850d02 /wiki/templates/Scrubber.pm | |
parent | d86946ec21f2175d3a5aad58c1ae236291c74b7a (diff) | |
download | emacsconf-ansible-a287e741842f67d0a04c48276221d85f16079d55.tar.xz emacsconf-ansible-a287e741842f67d0a04c48276221d85f16079d55.zip |
merging code between old emacsconf repo, to sachac's emacsconf repo
Diffstat (limited to 'wiki/templates/Scrubber.pm')
-rw-r--r-- | wiki/templates/Scrubber.pm | 749 |
1 files changed, 0 insertions, 749 deletions
diff --git a/wiki/templates/Scrubber.pm b/wiki/templates/Scrubber.pm deleted file mode 100644 index 2efaa10..0000000 --- a/wiki/templates/Scrubber.pm +++ /dev/null @@ -1,749 +0,0 @@ -package HTML::Scrubber; - -# ABSTRACT: Perl extension for scrubbing/sanitizing html - - -use 5.008; # enforce minimum perl version of 5.8 -use strict; -use warnings; -use HTML::Parser 3.47 (); -use HTML::Entities; -use Scalar::Util ('weaken'); - -our ( @_scrub, @_scrub_fh ); - -our $VERSION = '0.15'; # VERSION -our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY - -# my my my my, these here to prevent foolishness like -# http://perlmonks.org/index.pl?node_id=251127#Stealing+Lexicals -(@_scrub) = ( \&_scrub, "self, event, tagname, attr, attrseq, text" ); -(@_scrub_fh) = ( \&_scrub_fh, "self, event, tagname, attr, attrseq, text" ); - -sub new { - my $package = shift; - my $p = HTML::Parser->new( - api_version => 3, - default_h => \@_scrub, - marked_sections => 0, - strict_comment => 0, - unbroken_text => 1, - case_sensitive => 0, - boolean_attribute_value => undef, - empty_element_tags => 1, - ); - - my $self = { - _p => $p, - _rules => { '*' => 0, }, - _comment => 0, - _process => 0, - _r => "", - _optimize => 1, - _script => 0, - _style => 0, - }; - - $p->{"\0_s"} = bless $self, $package; - weaken( $p->{"\0_s"} ); - - return $self unless @_; - - my (%args) = @_; - - for my $f (qw[ default allow deny rules process comment ]) { - next unless exists $args{$f}; - if ( ref $args{$f} ) { - $self->$f( @{ $args{$f} } ); - } - else { - $self->$f( $args{$f} ); - } - } - - return $self; -} - - -sub comment { - return $_[0]->{_comment} - if @_ == 1; - $_[0]->{_comment} = $_[1]; - return; -} - - -sub process { - return $_[0]->{_process} - if @_ == 1; - $_[0]->{_process} = $_[1]; - return; -} - - -sub script { - return $_[0]->{_script} - if @_ == 1; - $_[0]->{_script} = $_[1]; - return; -} - - -sub style { - return $_[0]->{_style} - if @_ == 1; - $_[0]->{_style} = $_[1]; - return; -} - - -sub allow { - my $self = shift; - for my $k (@_) { - $self->{_rules}{ lc $k } = 1; - } - $self->{_optimize} = 1; # each time a rule changes, reoptimize when parse - - return; -} - - -sub deny { - my $self = shift; - - for my $k (@_) { - $self->{_rules}{ lc $k } = 0; - } - - $self->{_optimize} = 1; # each time a rule changes, reoptimize when parse - - return; -} - - -sub rules { - my $self = shift; - my (%rules) = @_; - for my $k ( keys %rules ) { - $self->{_rules}{ lc $k } = $rules{$k}; - } - - $self->{_optimize} = 1; # each time a rule changes, reoptimize when parse - - return; -} - - -sub default { - return $_[0]->{_rules}{'*'} - if @_ == 1; - - $_[0]->{_rules}{'*'} = $_[1] if defined $_[1]; - $_[0]->{_rules}{'_'} = $_[2] if defined $_[2] and ref $_[2]; - $_[0]->{_optimize} = 1; # each time a rule changes, reoptimize when parse - - return; -} - - -sub scrub_file { - if ( @_ > 2 ) { - return unless defined $_[0]->_out( $_[2] ); - } - else { - $_[0]->{_p}->handler( default => @_scrub ); - } - - $_[0]->_optimize(); #if $_[0]->{_optimize}; - - $_[0]->{_p}->parse_file( $_[1] ); - - return delete $_[0]->{_r} unless exists $_[0]->{_out}; - print { $_[0]->{_out} } $_[0]->{_r} if length $_[0]->{_r}; - delete $_[0]->{_out}; - return 1; -} - - -sub scrub { - if ( @_ > 2 ) { - return unless defined $_[0]->_out( $_[2] ); - } - else { - $_[0]->{_p}->handler( default => @_scrub ); - } - - $_[0]->_optimize(); # if $_[0]->{_optimize}; - - $_[0]->{_p}->parse( $_[1] ) if defined( $_[1] ); - $_[0]->{_p}->eof(); - - return delete $_[0]->{_r} unless exists $_[0]->{_out}; - delete $_[0]->{_out}; - return 1; -} - - -sub _out { - my ( $self, $o ) = @_; - - unless ( ref $o and ref \$o ne 'GLOB' ) { - open my $F, '>', $o or return; - binmode $F; - $self->{_out} = $F; - } - else { - $self->{_out} = $o; - } - - $self->{_p}->handler( default => @_scrub_fh ); - - return 1; -} - - -sub _validate { - my ( $s, $t, $r, $a, $as ) = @_; - return "<$t>" unless %$a; - - $r = $s->{_rules}->{$r}; - my %f; - - for my $k ( keys %$a ) { - my $check = exists $r->{$k} ? $r->{$k} : exists $r->{'*'} ? $r->{'*'} : next; - - if ( ref $check eq 'CODE' ) { - my @v = $check->( $s, $t, $k, $a->{$k}, $a, \%f ); - next unless @v; - $f{$k} = shift @v; - } - elsif ( ref $check || length($check) > 1 ) { - $f{$k} = $a->{$k} if $a->{$k} =~ m{$check}; - } - elsif ($check) { - $f{$k} = $a->{$k}; - } - } - - if (%f) { - my %seen; - return "<$t $r>" - if $r = join ' ', map { - defined $f{$_} - ? qq[$_="] . encode_entities( $f{$_} ) . q["] - : $_; # boolean attribute (TODO?) - } grep { exists $f{$_} and !$seen{$_}++; } @$as; - } - - return "<$t>"; -} - - -sub _scrub_str { - my ( $p, $e, $t, $a, $as, $text ) = @_; - - my $s = $p->{"\0_s"}; - my $outstr = ''; - - if ( $e eq 'start' ) { - if ( exists $s->{_rules}->{$t} ) # is there a specific rule - { - if ( ref $s->{_rules}->{$t} ) # is it complicated?(not simple;) - { - $outstr .= $s->_validate( $t, $t, $a, $as ); - } - elsif ( $s->{_rules}->{$t} ) # validate using default attribute rule - { - $outstr .= $s->_validate( $t, '_', $a, $as ); - } - } - elsif ( $s->{_rules}->{'*'} ) # default allow tags - { - $outstr .= $s->_validate( $t, '_', $a, $as ); - } - } - elsif ( $e eq 'end' ) { - my $place = 0; - if ( exists $s->{_rules}->{$t} ) { - $place = 1 if $s->{_rules}->{$t}; - } - elsif ( $s->{_rules}->{'*'} ) { - $place = 1; - } - if ($place) { - if ( length $text ) { - $outstr .= "</$t>"; - } - else { - substr $s->{_r}, -1, 0, ' /'; - } - } - } - elsif ( $e eq 'comment' ) { - if ( $s->{_comment} ) { - - # only copy comments through if they are well formed... - $outstr .= $text if ( $text =~ m|^<!--.*-->$|ms ); - } - } - elsif ( $e eq 'process' ) { - $outstr .= $text if $s->{_process}; - } - elsif ( $e eq 'text' or $e eq 'default' ) { - $text =~ s/</</g; #https://rt.cpan.org/Ticket/Attachment/8716/10332/scrubber.patch - $text =~ s/>/>/g; - - $outstr .= $text; - } - elsif ( $e eq 'start_document' ) { - $outstr = ""; - } - - return $outstr; -} - - -sub _scrub_fh { - my $self = $_[0]->{"\0_s"}; - print { $self->{_out} } $self->{'_r'} if length $self->{_r}; - $self->{'_r'} = _scrub_str(@_); -} - - -sub _scrub { - - $_[0]->{"\0_s"}->{_r} .= _scrub_str(@_); -} - -sub _optimize { - my ($self) = @_; - - my (@ignore_elements) = grep { not $self->{"_$_"} } qw(script style); - $self->{_p}->ignore_elements(@ignore_elements); # if @ is empty, we reset ;) - - return unless $self->{_optimize}; - - #sub allow - # return unless $self->{_optimize}; # till I figure it out (huh) - - if ( $self->{_rules}{'*'} ) { # default allow - $self->{_p}->report_tags(); # so clear it - } - else { - - my (@reports) = - grep { # report only tags we want - $self->{_rules}{$_} - } keys %{ $self->{_rules} }; - - $self->{_p}->report_tags( # default deny, so optimize - @reports - ) if @reports; - } - - # sub deny - # return unless $self->{_optimize}; # till I figure it out (huh) - my (@ignores) = - grep { not $self->{_rules}{$_} } grep { $_ ne '*' } keys %{ $self->{_rules} }; - - $self->{_p}->ignore_tags( # always ignore stuff we don't want - @ignores - ) if @ignores; - - $self->{_optimize} = 0; - return; -} - -1; - -#print sprintf q[ '%-12s => %s,], "$_'", $h{$_} for sort keys %h;# perl! -#perl -ne"chomp;print $_;print qq'\t\t# test ', ++$a if /ok\(/;print $/" test.pl >test2.pl -#perl -ne"chomp;print $_;if( /ok\(/ ){s/\#test \d+$//;print qq'\t\t# test ', ++$a }print $/" test.pl >test2.pl -#perl -ne"chomp;if(/ok\(/){s/# test .*$//;print$_,qq'\t\t# test ',++$a}else{print$_}print$/" test.pl >test2.pl - -__END__ - -=pod - -=for stopwords html cpan callback homepage Perlbrew perltidy respository - -=head1 NAME - -HTML::Scrubber - Perl extension for scrubbing/sanitizing html - -=head1 VERSION - -version 0.15 - -=head1 SYNOPSIS - - use HTML::Scrubber; - - my $scrubber = HTML::Scrubber->new( allow => [ qw[ p b i u hr br ] ] ); - print $scrubber->scrub('<p><b>bold</b> <em>missing</em></p>'); - # output is: <p><b>bold</b> </p> - - # more complex input - my $html = q[ - <style type="text/css"> BAD { background: #666; color: #666;} </style> - <script language="javascript"> alert("Hello, I am EVIL!"); </script> - <HR> - a => <a href=1>link </a> - br => <br> - b => <B> bold </B> - u => <U> UNDERLINE </U> - ]; - - print $scrubber->scrub($html); - - $scrubber->deny( qw[ p b i u hr br ] ); - - print $scrubber->scrub($html); - -=head1 DESCRIPTION - -If you want to "scrub" or "sanitize" html input in a reliable and flexible -fashion, then this module is for you. - -I wasn't satisfied with HTML::Sanitizer because it is based on -HTML::TreeBuilder, so I thought I'd write something similar that works directly -with HTML::Parser. - -=head1 METHODS - -First a note on documentation: just study the L<EXAMPLE|"EXAMPLE"> below. It's -all the documentation you could need - -Also, be sure to read all the comments as well as L<How does it work?|"How does -it work?">. - -If you're new to perl, good luck to you. - -=head2 comment - - warn "comments are ", $p->comment ? 'allowed' : 'not allowed'; - $p->comment(0); # off by default - -=head2 process - - warn "process instructions are ", $p->process ? 'allowed' : 'not allowed'; - $p->process(0); # off by default - -=head2 script - - warn "script tags (and everything in between) are supressed" - if $p->script; # off by default - $p->script( 0 || 1 ); - -B<**> Please note that this is implemented using HTML::Parser's ignore_elements -function, so if C<script> is set to true, all script tags encountered will be -validated like all other tags. - -=head2 style - - warn "style tags (and everything in between) are supressed" - if $p->style; # off by default - $p->style( 0 || 1 ); - -B<**> Please note that this is implemented using HTML::Parser's ignore_elements -function, so if C<style> is set to true, all style tags encountered will be -validated like all other tags. - -=head2 allow - - $p->allow(qw[ t a g s ]); - -=head2 deny - - $p->deny(qw[ t a g s ]); - -=head2 rules - - $p->rules( - img => { - src => qr{^(?!http://)}i, # only relative image links allowed - alt => 1, # alt attribute allowed - '*' => 0, # deny all other attributes - }, - a => { - href => sub { ... }, # check or adjust with a callback - }, - b => 1, - ... - ); - -Updates set of attribute rules. Each rule can be 1/0, regular expression or a -callback. Values longer than 1 char are treated as regexps. Callback is called -with the following arguments: this object, tag name, attribute name and -attribute value, should return empty list to drop attribute, C<undef> to keep -it without value or a new scalar value. - -=head2 default - - print "default is ", $p->default(); - $p->default(1); # allow tags by default - $p->default( - undef, # don't change - { # default attribute rules - '*' => 1, # allow attributes by default - } - ); - -=head2 scrub_file - - $html = $scrubber->scrub_file('foo.html'); ## returns giant string - die "Eeek $!" unless defined $html; ## opening foo.html may have failed - $scrubber->scrub_file('foo.html', 'new.html') or die "Eeek $!"; - $scrubber->scrub_file('foo.html', *STDOUT) - or die "Eeek $!" - if fileno STDOUT; - -=head2 scrub - - print $scrubber->scrub($html); ## returns giant string - $scrubber->scrub($html, 'new.html') or die "Eeek $!"; - $scrubber->scrub($html', *STDOUT) - or die "Eeek $!" - if fileno STDOUT; - -=for comment _out - $scrubber->_out(*STDOUT) if fileno STDOUT; - $scrubber->_out('foo.html') or die "eeek $!"; - -=for comment _validate -Uses $self->{_rules} to do attribute validation. -Takes tag, rule('_' || $tag), attrref. - -=for comment _scrub_str - -I<default> handler, used by both _scrub and _scrub_fh Moved all the common code -(basically all of it) into a single routine for ease of maintenance - -=for comment _scrub_fh - -I<default> handler, does the scrubbing if we're scrubbing out to a file. Now -calls _scrub_str and pushes that out to a file. - -=for comment _scrub - -I<default> handler, does the scrubbing if we're returning a giant string. Now -calls _scrub_str and appends that to the output string. - -=head1 How does it work? - -When a tag is encountered, HTML::Scrubber allows/denies the tag using the -explicit rule if one exists. - -If no explicit rule exists, Scrubber applies the default rule. - -If an explicit rule exists, but it's a simple rule(1), the default attribute -rule is applied. - -=head2 EXAMPLE - -=for example begin - - #!/usr/bin/perl -w - use HTML::Scrubber; - use strict; - - my @allow = qw[ br hr b a ]; - - my @rules = ( - script => 0, - img => { - src => qr{^(?!http://)}i, # only relative image links allowed - alt => 1, # alt attribute allowed - '*' => 0, # deny all other attributes - }, - ); - - my @default = ( - 0 => # default rule, deny all tags - { - '*' => 1, # default rule, allow all attributes - 'href' => qr{^(?:http|https|ftp)://}i, - 'src' => qr{^(?:http|https|ftp)://}i, - - # If your perl doesn't have qr - # just use a string with length greater than 1 - 'cite' => '(?i-xsm:^(?:http|https|ftp):)', - 'language' => 0, - 'name' => 1, # could be sneaky, but hey ;) - 'onblur' => 0, - 'onchange' => 0, - 'onclick' => 0, - 'ondblclick' => 0, - 'onerror' => 0, - 'onfocus' => 0, - 'onkeydown' => 0, - 'onkeypress' => 0, - 'onkeyup' => 0, - 'onload' => 0, - 'onmousedown' => 0, - 'onmousemove' => 0, - 'onmouseout' => 0, - 'onmouseover' => 0, - 'onmouseup' => 0, - 'onreset' => 0, - 'onselect' => 0, - 'onsubmit' => 0, - 'onunload' => 0, - 'src' => 0, - 'type' => 0, - } - ); - - my $scrubber = HTML::Scrubber->new(); - $scrubber->allow(@allow); - $scrubber->rules(@rules); # key/value pairs - $scrubber->default(@default); - $scrubber->comment(1); # 1 allow, 0 deny - - ## preferred way to create the same object - $scrubber = HTML::Scrubber->new( - allow => \@allow, - rules => \@rules, - default => \@default, - comment => 1, - process => 0, - ); - - require Data::Dumper, die Data::Dumper::Dumper($scrubber) if @ARGV; - - my $it = q[ - <?php echo(" EVIL EVIL EVIL "); ?> <!-- asdf --> - <hr> - <I FAKE="attribute" > IN ITALICS WITH FAKE="attribute" </I><br> - <B> IN BOLD </B><br> - <A NAME="evil"> - <A HREF="javascript:alert('die die die');">HREF=JAVA <!></A> - <br> - <A HREF="image/bigone.jpg" ONMOUSEOVER="alert('die die die');"> - <IMG SRC="image/smallone.jpg" ALT="ONMOUSEOVER JAVASCRIPT"> - </A> - </A> <br> - ]; - - print "#original text", $/, $it, $/; - print - "#scrubbed text (default ", $scrubber->default(), # no arguments returns the current value - " comment ", $scrubber->comment(), " process ", $scrubber->process(), " )", $/, $scrubber->scrub($it), $/; - - $scrubber->default(1); # allow all tags by default - $scrubber->comment(0); # deny comments - - print - "#scrubbed text (default ", - $scrubber->default(), - " comment ", - $scrubber->comment(), - " process ", - $scrubber->process(), - " )", $/, - $scrubber->scrub($it), - $/; - - $scrubber->process(1); # allow process instructions (dangerous) - $default[0] = 1; # allow all tags by default - $default[1]->{'*'} = 0; # deny all attributes by default - $scrubber->default(@default); # set the default again - - print - "#scrubbed text (default ", - $scrubber->default(), - " comment ", - $scrubber->comment(), - " process ", - $scrubber->process(), - " )", $/, - $scrubber->scrub($it), - $/; - -=for example end - -=head2 FUN - -If you have Test::Inline (and you've installed HTML::Scrubber), try - - pod2test Scrubber.pm >scrubber.t - perl scrubber.t - -=head1 SEE ALSO - -L<HTML::Parser>, L<Test::Inline>. - -The C<HTML::Sanitizer> module is no longer available on CPAN. - -=head1 VERSION REQUIREMENTS - -As of version 0.14 I have added a perl minimum version requirement of 5.8. This -is basically due to failures on the smokers perl 5.6 installations - which -appears to be down to installation mechanisms and requirements. - -Since I don't want to spend the time supporting a version that is so old (and -may not work for reasons on UTF support etc), I have added a C<use 5.008;> to -the main module. - -If this is problematic I am very willing to accept patches to fix this up, -although I do not personally see a good reason to support a release that has -been obsolete for 13 years. - -=head1 CONTRIBUTING - -If you want to contribute to the development of this module, the code is on -L<GitHub|http://github.com/nigelm/html-scrubber>. You'll need a perl -environment with L<Dist::Zilla>, and if you're just getting started, there's -some documentation on using Vagrant and Perlbrew -L<here|http://mrcaron.github.io/2015/03/06/Perl-CPAN-Pull-Request.html>. - -There is now a C<.perltidyrc> and a <.tidyallrc> file within the respository -for the standard perltidy settings used - I will apply these before new -releases. Please do not let formatting prevent you from sending in patches etc -- this can be sorted out as part of the release process. Info on C<tidyall> -can be found at -L<https://metacpan.org/pod/distribution/Code-TidyAll/bin/tidyall>. - -=head1 INSTALLATION - -See perlmodinstall for information and options on installing Perl modules. - -=head1 BUGS AND LIMITATIONS - -You can make new bug reports, and view existing ones, through the -web interface at L<http://rt.cpan.org/Public/Dist/Display.html?Name=HTML-Scrubber>. - -=head1 AVAILABILITY - -The project homepage is L<https://metacpan.org/release/HTML-Scrubber>. - -The latest version of this module is available from the Comprehensive Perl -Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN -site near you, or see L<https://metacpan.org/module/HTML::Scrubber/>. - -=head1 AUTHORS - -=over 4 - -=item * - -Ruslan Zakirov <Ruslan.Zakirov@gmail.com> - -=item * - -Nigel Metheringham <nigelm@cpan.org> - -=item * - -D. H. <podmaster@cpan.org> - -=back - -=head1 COPYRIGHT AND LICENSE - -This software is copyright (c) 2015 by Ruslan Zakirov, Nigel Metheringham, 2003-2004 D. H.. - -This is free software; you can redistribute it and/or modify it under -the same terms as the Perl 5 programming language system itself. - -=cut |