From a287e741842f67d0a04c48276221d85f16079d55 Mon Sep 17 00:00:00 2001 From: Opal <847966@proton.me> Date: Sat, 15 Oct 2022 15:27:41 -0700 Subject: merging code between old emacsconf repo, to sachac's emacsconf repo --- roles/wiki/templates/Scrubber.pm | 749 +++++++++++++++++++++++++++++++++++ roles/wiki/templates/copyright.pm | 60 +++ roles/wiki/templates/emacsconf.setup | 440 ++++++++++++++++++++ roles/wiki/templates/htmlscrubber.pm | 132 ++++++ roles/wiki/templates/license.pm | 59 +++ 5 files changed, 1440 insertions(+) create mode 100644 roles/wiki/templates/Scrubber.pm create mode 100644 roles/wiki/templates/copyright.pm create mode 100644 roles/wiki/templates/emacsconf.setup create mode 100755 roles/wiki/templates/htmlscrubber.pm create mode 100644 roles/wiki/templates/license.pm (limited to 'roles/wiki/templates') diff --git a/roles/wiki/templates/Scrubber.pm b/roles/wiki/templates/Scrubber.pm new file mode 100644 index 0000000..2efaa10 --- /dev/null +++ b/roles/wiki/templates/Scrubber.pm @@ -0,0 +1,749 @@ +package HTML::Scrubber; + +# ABSTRACT: Perl extension for scrubbing/sanitizing html + + +use 5.008; # enforce minimum perl version of 5.8 +use strict; +use warnings; +use HTML::Parser 3.47 (); +use HTML::Entities; +use Scalar::Util ('weaken'); + +our ( @_scrub, @_scrub_fh ); + +our $VERSION = '0.15'; # VERSION +our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY + +# my my my my, these here to prevent foolishness like +# http://perlmonks.org/index.pl?node_id=251127#Stealing+Lexicals +(@_scrub) = ( \&_scrub, "self, event, tagname, attr, attrseq, text" ); +(@_scrub_fh) = ( \&_scrub_fh, "self, event, tagname, attr, attrseq, text" ); + +sub new { + my $package = shift; + my $p = HTML::Parser->new( + api_version => 3, + default_h => \@_scrub, + marked_sections => 0, + strict_comment => 0, + unbroken_text => 1, + case_sensitive => 0, + boolean_attribute_value => undef, + empty_element_tags => 1, + ); + + my $self = { + _p => $p, + _rules => { '*' => 0, }, + _comment => 0, + _process => 0, + _r => "", + _optimize => 1, + _script => 0, + _style => 0, + }; + + $p->{"\0_s"} = bless $self, $package; + weaken( $p->{"\0_s"} ); + + return $self unless @_; + + my (%args) = @_; + + for my $f (qw[ default allow deny rules process comment ]) { + next unless exists $args{$f}; + if ( ref $args{$f} ) { + $self->$f( @{ $args{$f} } ); + } + else { + $self->$f( $args{$f} ); + } + } + + return $self; +} + + +sub comment { + return $_[0]->{_comment} + if @_ == 1; + $_[0]->{_comment} = $_[1]; + return; +} + + +sub process { + return $_[0]->{_process} + if @_ == 1; + $_[0]->{_process} = $_[1]; + return; +} + + +sub script { + return $_[0]->{_script} + if @_ == 1; + $_[0]->{_script} = $_[1]; + return; +} + + +sub style { + return $_[0]->{_style} + if @_ == 1; + $_[0]->{_style} = $_[1]; + return; +} + + +sub allow { + my $self = shift; + for my $k (@_) { + $self->{_rules}{ lc $k } = 1; + } + $self->{_optimize} = 1; # each time a rule changes, reoptimize when parse + + return; +} + + +sub deny { + my $self = shift; + + for my $k (@_) { + $self->{_rules}{ lc $k } = 0; + } + + $self->{_optimize} = 1; # each time a rule changes, reoptimize when parse + + return; +} + + +sub rules { + my $self = shift; + my (%rules) = @_; + for my $k ( keys %rules ) { + $self->{_rules}{ lc $k } = $rules{$k}; + } + + $self->{_optimize} = 1; # each time a rule changes, reoptimize when parse + + return; +} + + +sub default { + return $_[0]->{_rules}{'*'} + if @_ == 1; + + $_[0]->{_rules}{'*'} = $_[1] if defined $_[1]; + $_[0]->{_rules}{'_'} = $_[2] if defined $_[2] and ref $_[2]; + $_[0]->{_optimize} = 1; # each time a rule changes, reoptimize when parse + + return; +} + + +sub scrub_file { + if ( @_ > 2 ) { + return unless defined $_[0]->_out( $_[2] ); + } + else { + $_[0]->{_p}->handler( default => @_scrub ); + } + + $_[0]->_optimize(); #if $_[0]->{_optimize}; + + $_[0]->{_p}->parse_file( $_[1] ); + + return delete $_[0]->{_r} unless exists $_[0]->{_out}; + print { $_[0]->{_out} } $_[0]->{_r} if length $_[0]->{_r}; + delete $_[0]->{_out}; + return 1; +} + + +sub scrub { + if ( @_ > 2 ) { + return unless defined $_[0]->_out( $_[2] ); + } + else { + $_[0]->{_p}->handler( default => @_scrub ); + } + + $_[0]->_optimize(); # if $_[0]->{_optimize}; + + $_[0]->{_p}->parse( $_[1] ) if defined( $_[1] ); + $_[0]->{_p}->eof(); + + return delete $_[0]->{_r} unless exists $_[0]->{_out}; + delete $_[0]->{_out}; + return 1; +} + + +sub _out { + my ( $self, $o ) = @_; + + unless ( ref $o and ref \$o ne 'GLOB' ) { + open my $F, '>', $o or return; + binmode $F; + $self->{_out} = $F; + } + else { + $self->{_out} = $o; + } + + $self->{_p}->handler( default => @_scrub_fh ); + + return 1; +} + + +sub _validate { + my ( $s, $t, $r, $a, $as ) = @_; + return "<$t>" unless %$a; + + $r = $s->{_rules}->{$r}; + my %f; + + for my $k ( keys %$a ) { + my $check = exists $r->{$k} ? $r->{$k} : exists $r->{'*'} ? $r->{'*'} : next; + + if ( ref $check eq 'CODE' ) { + my @v = $check->( $s, $t, $k, $a->{$k}, $a, \%f ); + next unless @v; + $f{$k} = shift @v; + } + elsif ( ref $check || length($check) > 1 ) { + $f{$k} = $a->{$k} if $a->{$k} =~ m{$check}; + } + elsif ($check) { + $f{$k} = $a->{$k}; + } + } + + if (%f) { + my %seen; + return "<$t $r>" + if $r = join ' ', map { + defined $f{$_} + ? qq[$_="] . encode_entities( $f{$_} ) . q["] + : $_; # boolean attribute (TODO?) + } grep { exists $f{$_} and !$seen{$_}++; } @$as; + } + + return "<$t>"; +} + + +sub _scrub_str { + my ( $p, $e, $t, $a, $as, $text ) = @_; + + my $s = $p->{"\0_s"}; + my $outstr = ''; + + if ( $e eq 'start' ) { + if ( exists $s->{_rules}->{$t} ) # is there a specific rule + { + if ( ref $s->{_rules}->{$t} ) # is it complicated?(not simple;) + { + $outstr .= $s->_validate( $t, $t, $a, $as ); + } + elsif ( $s->{_rules}->{$t} ) # validate using default attribute rule + { + $outstr .= $s->_validate( $t, '_', $a, $as ); + } + } + elsif ( $s->{_rules}->{'*'} ) # default allow tags + { + $outstr .= $s->_validate( $t, '_', $a, $as ); + } + } + elsif ( $e eq 'end' ) { + my $place = 0; + if ( exists $s->{_rules}->{$t} ) { + $place = 1 if $s->{_rules}->{$t}; + } + elsif ( $s->{_rules}->{'*'} ) { + $place = 1; + } + if ($place) { + if ( length $text ) { + $outstr .= ""; + } + else { + substr $s->{_r}, -1, 0, ' /'; + } + } + } + elsif ( $e eq 'comment' ) { + if ( $s->{_comment} ) { + + # only copy comments through if they are well formed... + $outstr .= $text if ( $text =~ m|^$|ms ); + } + } + elsif ( $e eq 'process' ) { + $outstr .= $text if $s->{_process}; + } + elsif ( $e eq 'text' or $e eq 'default' ) { + $text =~ s//>/g; + + $outstr .= $text; + } + elsif ( $e eq 'start_document' ) { + $outstr = ""; + } + + return $outstr; +} + + +sub _scrub_fh { + my $self = $_[0]->{"\0_s"}; + print { $self->{_out} } $self->{'_r'} if length $self->{_r}; + $self->{'_r'} = _scrub_str(@_); +} + + +sub _scrub { + + $_[0]->{"\0_s"}->{_r} .= _scrub_str(@_); +} + +sub _optimize { + my ($self) = @_; + + my (@ignore_elements) = grep { not $self->{"_$_"} } qw(script style); + $self->{_p}->ignore_elements(@ignore_elements); # if @ is empty, we reset ;) + + return unless $self->{_optimize}; + + #sub allow + # return unless $self->{_optimize}; # till I figure it out (huh) + + if ( $self->{_rules}{'*'} ) { # default allow + $self->{_p}->report_tags(); # so clear it + } + else { + + my (@reports) = + grep { # report only tags we want + $self->{_rules}{$_} + } keys %{ $self->{_rules} }; + + $self->{_p}->report_tags( # default deny, so optimize + @reports + ) if @reports; + } + + # sub deny + # return unless $self->{_optimize}; # till I figure it out (huh) + my (@ignores) = + grep { not $self->{_rules}{$_} } grep { $_ ne '*' } keys %{ $self->{_rules} }; + + $self->{_p}->ignore_tags( # always ignore stuff we don't want + @ignores + ) if @ignores; + + $self->{_optimize} = 0; + return; +} + +1; + +#print sprintf q[ '%-12s => %s,], "$_'", $h{$_} for sort keys %h;# perl! +#perl -ne"chomp;print $_;print qq'\t\t# test ', ++$a if /ok\(/;print $/" test.pl >test2.pl +#perl -ne"chomp;print $_;if( /ok\(/ ){s/\#test \d+$//;print qq'\t\t# test ', ++$a }print $/" test.pl >test2.pl +#perl -ne"chomp;if(/ok\(/){s/# test .*$//;print$_,qq'\t\t# test ',++$a}else{print$_}print$/" test.pl >test2.pl + +__END__ + +=pod + +=for stopwords html cpan callback homepage Perlbrew perltidy respository + +=head1 NAME + +HTML::Scrubber - Perl extension for scrubbing/sanitizing html + +=head1 VERSION + +version 0.15 + +=head1 SYNOPSIS + + use HTML::Scrubber; + + my $scrubber = HTML::Scrubber->new( allow => [ qw[ p b i u hr br ] ] ); + print $scrubber->scrub('

bold missing

'); + # output is:

bold

+ + # more complex input + my $html = q[ + + +
+ a => link + br =>
+ b => bold + u => UNDERLINE + ]; + + print $scrubber->scrub($html); + + $scrubber->deny( qw[ p b i u hr br ] ); + + print $scrubber->scrub($html); + +=head1 DESCRIPTION + +If you want to "scrub" or "sanitize" html input in a reliable and flexible +fashion, then this module is for you. + +I wasn't satisfied with HTML::Sanitizer because it is based on +HTML::TreeBuilder, so I thought I'd write something similar that works directly +with HTML::Parser. + +=head1 METHODS + +First a note on documentation: just study the L below. It's +all the documentation you could need + +Also, be sure to read all the comments as well as L. + +If you're new to perl, good luck to you. + +=head2 comment + + warn "comments are ", $p->comment ? 'allowed' : 'not allowed'; + $p->comment(0); # off by default + +=head2 process + + warn "process instructions are ", $p->process ? 'allowed' : 'not allowed'; + $p->process(0); # off by default + +=head2 script + + warn "script tags (and everything in between) are supressed" + if $p->script; # off by default + $p->script( 0 || 1 ); + +B<**> Please note that this is implemented using HTML::Parser's ignore_elements +function, so if C