[interchange] * Add [adjust-href] tag that transforms regular HTML links into

Mike Heins interchange-cvs at icdevgroup.org
Mon Jun 19 12:34:54 UTC 2017


commit c327ac886db17c8f583f18bff87cb2b6526d9fb9
Author: Mike Heins <mikeh at endpoint.com>
Date:   Mon Jun 19 08:27:50 2017 -0400

    * Add [adjust-href] tag that transforms regular HTML links into
      Interchange URLs.
    
    * Add Vend::Server hook to apply tag to all HTML.  Uses HTML::Parser
      for high efficiency.

 MANIFEST                     |    8 ++-
 WHATSNEW-5.11                |    7 ++
 code/UserTag/adjust_href.tag |  145 ++++++++++++++++++++++++++++++++++++++++++
 lib/Vend/Server.pm           |   42 ++++++++++--
 4 files changed, 194 insertions(+), 8 deletions(-)
---
diff --git a/MANIFEST b/MANIFEST
index 0de26b9..2ae8122 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -1,8 +1,8 @@
-cpanfile
 code/Filter/acl2hash.filter
 code/Filter/alpha.filter
 code/Filter/alphanumeric.filter
 code/Filter/backslash.filter
+code/Filter/bcrypt.filter
 code/Filter/bold.filter
 code/Filter/cgi.filter
 code/Filter/checkbox.filter
@@ -281,6 +281,7 @@ code/UI_Tag/widget_meta.coretag
 code/UI_Tag/write_relative_file.coretag
 code/UI_Tag/write_shipping.coretag
 code/UI_Tag/xfer_catalog.coretag
+code/UserTag/adjust_href.tag
 code/UserTag/bar_button.tag
 code/UserTag/button.tag
 code/UserTag/capture_page.tag
@@ -346,6 +347,7 @@ code/Widget/value.widget
 code/Widget/yesno.widget
 code/Widget/ynzero.widget
 configure
+cpanfile
 debian/400mod_interchange.info
 debian/catalog_before.cfg
 debian/catalogs.cfg
@@ -1285,6 +1287,9 @@ SPECS/interchange-init
 SPECS/interchange-logrotate
 SPECS/interchange-wrapper
 SPECS/interchange.spec
+t/credit_cards.t
+t/interchange-test/interchange.cfg
+t/interchange-test/variable.txt
 test.pl
 UPGRADE
 WHATSNEW-4.5
@@ -1292,6 +1297,7 @@ WHATSNEW-4.7
 WHATSNEW-4.9
 WHATSNEW-5.1
 WHATSNEW-5.10
+WHATSNEW-5.11
 WHATSNEW-5.3
 WHATSNEW-5.5
 WHATSNEW-5.7
diff --git a/WHATSNEW-5.11 b/WHATSNEW-5.11
index ade81fe..dcf5df7 100644
--- a/WHATSNEW-5.11
+++ b/WHATSNEW-5.11
@@ -23,6 +23,13 @@ Core
 
 * Add bcrypt filter.
 
+* Add [adjust-href] tag. Reads HTML passed to it, finds <a href> links
+  and adjusts ones that don't begin with an absolute path to
+  Interchange URLs. Normally done by setting "Pragma adjust_href" in
+  catalog.cfg (or [pragma adjust_href] at the top of the page).
+
+* Add code to Server.pm to adjust <a href="link"> tags when "Pragma adjust_href"
+  is in force. Uses above tag.
 
 Payments
 --------
diff --git a/code/UserTag/adjust_href.tag b/code/UserTag/adjust_href.tag
new file mode 100644
index 0000000..b6dd752
--- /dev/null
+++ b/code/UserTag/adjust_href.tag
@@ -0,0 +1,145 @@
+UserTag adjust_href hasEndTag
+UserTag adjust_href Routine <<EOR
+sub {
+	my $text = shift;
+	use HTML::Parser;
+	use vars qw/ $Tag /;
+
+	my @out;
+
+	my $starth = sub {
+		my $tag = shift;
+		if(lc($tag) ne 'a') {
+			push @out, shift;
+			return;
+		}
+		my $text = shift;
+		my $attr = shift;
+		my $href = $attr->{href};
+		if($::Pragma->{allow_for_users} and $href =~ s{^$Vend::Cfg->{VendURL}/}{}) {
+			## Do nothing, removed user-clipped link intro
+			$attr->{href} = $href;
+		}
+		if($href =~ m{^\w+:} or $href =~ /^[^\w]/) {
+			push @out, $text;
+			return;
+		}
+
+		my $needform;
+		if($::Pragma->{allow_for_users} and $attr->{href} =~ s/\?(.*)//) {
+			my @parms = split /\&/, $1;
+			my @ignore = qw/ mv_pc mv_session_id mv_source= id=/;
+			my $ignore = join "|", @ignore;
+			$ignore = qr/($ignore)/;
+			for(@parms) {
+				next if $_ =~ $ignore;
+				$needform++;
+				$attr->{form} .= "\n";
+				$attr->{form} .= $_;
+			}
+		}
+
+		my %handled = qw/
+				add_dot_html    1
+				add_source      1
+				anchor          1
+				auto_format     1
+				form            1
+				href            1
+				link_relative   1
+				match_security  1
+				no_count        1
+				no_session      1
+				no_session_id   1
+				path_only       1
+				link_relative   1
+				secure          1
+		/;
+		
+		my $attrseq = shift;
+		push @out, "<a ";
+		my $seq = '';
+		my $opt = {};
+		my %seen;
+		$needform and @$attrseq = grep !$seen{$_}, @$attrseq, 'form';
+		for(@$attrseq) {
+			if($handled{$_}) {
+				$opt->{$_} = $attr->{$_};
+			}
+			else {
+				$seq .= qq{ $_="};
+				$seq .= $attr->{$_};
+				$seq .= '"';
+			}
+		}
+		push @out, qq{ href="};
+		push @out, $Tag->area($opt);
+		push @out, '"';
+		push @out, $seq;
+		push @out, ">";
+	};
+
+	my $p = HTML::Parser->new();
+	$p->handler( start => $starth, "tagname, text, attr, attrseq");
+	$p->handler( end => sub { push @out, shift }, "text");
+	$p->handler( text => sub { push @out, shift }, "text");
+	$p->handler( comment => sub { push @out, shift }, "text");
+	$p->handler( process => sub { push @out, shift }, "text");
+	$p->handler( declaration => sub { push @out, shift }, "text");
+	$p->parse($text);
+
+	return join "", @out;
+
+}
+EOR
+UserTag adjust_href Documentation <<EOD
+=head1 NAME
+
+ITL tag [adjust-href/ -- Turn standard <a href="page?parm=val"> into Interchange link
+
+=head1 SYNOPSIS
+
+  [adjust-href]
+  <a href="somepage.html?parameter=value">
+	link anchor
+  </a>
+  [/adjust-href]
+
+ becomes
+
+  <a href="https://srv.dmn.com/cgi/link/somepage.html?parameter=value&id=x338Dbll">
+  	link anchor
+  </a>
+
+=head1 DESCRIPTION
+
+Reads HTML passed to it, finds <a href> links and adjusts ones that
+don't begin with an absolute path to Interchange URLs. Normally done
+by setting
+
+  Pragma adjust_href
+
+in catalog.cfg (or [pragma adjust_href] at the top of the page). When
+this is done, transformation is done for every HTML page without the
+tag being present.
+
+This allows an HTML editor to edit pages/links and result in valid 
+Interchange URLs.
+
+=head2 Options
+
+Can set Pragma allow_for_users to allow users to send/resend existing links
+and adjust them. Otherwise, previously adjusted URLs that were downloaded will
+not be adjusted.
+
+=head1 BUGS
+
+Does not allow for relative paths using ../ -- it probably should. Will look at
+enhancing tag to do so.
+
+=head1 AUTHOR
+
+Mike Heins
+
+=cut
+EOD
diff --git a/lib/Vend/Server.pm b/lib/Vend/Server.pm
index 0d9c852..2ca4e6a 100644
--- a/lib/Vend/Server.pm
+++ b/lib/Vend/Server.pm
@@ -701,17 +701,21 @@ sub respond {
 		binmode(MESSAGE, ':utf8');
 	}
 
+	## Set this to determine if we want to do HTML-specific transformations. Also
+	## may be set below.
+	$Vend::IsHTML = $Vend::StatusLine =~ m{Content-Type:.*text/html}i;
+
 	if(! $s and $Vend::StatusLine) {
 		if ($Vend::StatusLine !~ /^Content-Type:/im) {
-		$Vend::StatusLine .= "\r\nContent-Type: text/html";
-		if ($response_charset) {
-			$Vend::StatusLine .= "; charset=$response_charset\r\n";
-		}
+			$Vend::StatusLine .= "\r\nContent-Type: text/html";
+			if ($response_charset) {
+				$Vend::StatusLine .= "; charset=$response_charset\r\n";
+			}
 
-		else {
-			$Vend::StatusLine .= "\r\n";
+			else {
+				$Vend::StatusLine .= "\r\n";
+			}
 		}
-	}
 
 # TRACK
 		$Vend::StatusLine .= "X-Track: " . $Vend::Track->header() . "\r\n"
@@ -720,6 +724,12 @@ sub respond {
 
 		add_cache_headers();
 
+		### Adjust links if appropriate
+		if($Vend::IsHTML and $::Pragma->{adjust_href}) {
+			my $text = Vend::Tags->adjust_href($$body);
+			$body = \$text;
+		}
+
 		print MESSAGE canon_status($Vend::StatusLine);
 		print MESSAGE "\r\n";
 		print MESSAGE $$body;
@@ -754,6 +764,13 @@ sub respond {
 	}
 
 	if($Vend::ResponseMade || $CGI::values{mv_no_header} ) {
+
+		### Adjust links if appropriate
+		if($Vend::IsHTML and $::Pragma->{adjust_href}) {
+			my $text = Vend::Tags->adjust_href($$body);
+			$body = \$text;
+		}
+
 		print $fh $$body;
 		print $rfh $$body if $rfh;
 #show_times("end response send") if $Global::ShowTimes;
@@ -863,6 +880,10 @@ sub respond {
 		else {
 			print $fh canon_status("Content-Type: text/html");
 		}
+
+		## This is HTML for sure now. Set this to force HTML-specific transformations if pragma(s) set.
+		$Vend::IsHTML = 1;
+
 # TRACK
 		print $fh canon_status("X-Track: " . $Vend::Track->header())
 			if $Vend::Track and $Vend::Cfg->{UserTrack};
@@ -872,6 +893,13 @@ sub respond {
 	print $fh canon_status($_) for get_cache_headers();
 
 	print $fh "\r\n";
+
+	### Adjust links if appropriate
+	if($Vend::IsHTML and $::Pragma->{adjust_href}) {
+		my $text = Vend::Tags->adjust_href($$body);
+		$body = \$text;
+	}
+
 	print $fh $$body;
 	print $rfh $$body if $rfh;
 #show_times("end response send") if $Global::ShowTimes;



More information about the interchange-cvs mailing list