[interchange-cvs] interchange - kwalsh modified 6 files

interchange-cvs at icdevgroup.org interchange-cvs at icdevgroup.org
Tue Mar 25 05:17:18 EST 2008


User:      kwalsh
Date:      2008-03-25 10:17:18 GMT
Modified:  lib/Vend File.pm Server.pm Util.pm
Modified:  lib/Vend/Table Common.pm DBI.pm
Added:     lib/Vend CharSet.pm
Log:
    * Committing Sonny Cook's UTF-8 patches, along with a fix for the
      PreFork issue caused by the patches.  Thanks, Sonny!

    * From Sonny's original article on interchange-core:

	There are two variables that will need to be added to your
	catalog.cfg:  MV_HTTP_CHARSET and MV_UTF8.  They should be set
	like so:

	    Variable MV_HTTP_CHARSET UTF-8
	    Variable MV_UTF8         1

	The MV_UTF8 variable tells the system that we are using UTF-8
	for stuff internally when that needs to be specified.  Perl mostly
	does the right thing wrt UTF-8, but when we need to explicitly
	specify for one of a handful of reasons, this variable lets us
	configure that.

	The MV_HTTP_CHARSET specifies which character set that the web
	pages are going to be encoded with.  UTF-8 is the only value that
	has been tested at the moment, although it probably generalises
	to whatever you would like to use.

	Communication with the database introduces three database
	directives.  These are required to ensure that data is properly
	communicated with the database:

	    PG_ENABLE_UTF8
	    MYSQL_ENABLE_UTF8
	    GDBM_ENABLE_UTF8

	These can be set on a table by table basis or with DatabaseDefault.
	You will probably want to set the one for the sql database you are
	using and one for GDBM, like so:

	    DatabaseDefault PG_ENABLE_UTF8 1
	    DatabaseDefault GDBM_ENABLE_UTF8 1

	You will need to make sure that your database is encoded in UTF-8
	and that all of your data is encoded that way as well.

	Enabling UTF-8 should not cause any problems if your data is all in
	US-ASCII, but might cause problems if other encodings are involved.

    * Note: This commit is missing the latest safeuntrap/reval/safetrap
      code, which should be added ASAP.  In the meantime, the following
      works in the interchange.cfg file (with Perl 5.8.8):

	SafeUntrap  rand require caller dofile print

Revision  Changes    Path
2.27                 interchange/lib/Vend/File.pm


rev 2.27, prev_rev 2.26
Index: File.pm
===================================================================
RCS file: /var/cvs/interchange/lib/Vend/File.pm,v
retrieving revision 2.26
retrieving revision 2.27
diff -u -r2.26 -r2.27
--- File.pm	24 Jan 2008 22:11:13 -0000	2.26
+++ File.pm	25 Mar 2008 10:17:18 -0000	2.27
@@ -1,6 +1,6 @@
 # Vend::File - Interchange file functions
 #
-# $Id: File.pm,v 2.26 2008-01-24 22:11:13 kwalsh Exp $
+# $Id: File.pm,v 2.27 2008-03-25 10:17:18 kwalsh Exp $
 # 
 # Copyright (C) 2002-2007 Interchange Development Group
 # Copyright (C) 1996-2002 Red Hat, Inc.
@@ -50,16 +50,25 @@
 use Config;
 use Fcntl;
 use Errno;
+use Encode qw( is_utf8 );
 use Vend::Util;
 use File::Path;
 use File::Copy;
 use subs qw(logError logGlobal);
 use vars qw($VERSION @EXPORT @EXPORT_OK $errstr);
-$VERSION = substr(q$Revision: 2.26 $, 10);
+$VERSION = substr(q$Revision: 2.27 $, 10);
 
 sub writefile {
     my($file, $data, $opt) = @_;
 
+	my $is_utf8;
+	if ($::Variable->{MV_UTF8} && ref $data) {
+		$is_utf8 = is_utf8($$data);
+	}
+	else {
+		$is_utf8 = is_utf8($data);
+	}
+
 	$file = ">>$file" unless $file =~ /^[|>]/;
 	if (ref $opt and $opt->{umask}) {
 		$opt->{umask} = umask oct($opt->{umask});
@@ -82,6 +91,7 @@
 			}
 			# We have checked for beginning > or | previously
 			open(MVLOGDATA, $file) or die "open\n";
+			binmode(MVLOGDATA, ":utf8") if $is_utf8;
 			lockfile(\*MVLOGDATA, 1, 1) or die "lock\n";
 			seek(MVLOGDATA, 0, 2) or die "seek\n";
 			if(ref $data) {
@@ -95,6 +105,7 @@
 		else {
             my (@args) = grep /\S/, Text::ParseWords::shellwords($file);
 			open(MVLOGDATA, "|-") || exec @args;
+			binmode(MVLOGDATA, ":utf8") if $is_utf8;
 			if(ref $data) {
 				print(MVLOGDATA $$data) or die "pipe to\n";
 			}
@@ -201,6 +212,7 @@
 		$Global::Variable->{MV_FILE} = $file;
 
 		binmode(READIN) if $Global::Windows;
+		binmode(READIN, ":utf8") if $::Variable->{MV_UTF8};
 		undef $/;
 		$contents = <READIN>;
 		close(READIN);



2.88                 interchange/lib/Vend/Server.pm


rev 2.88, prev_rev 2.87
Index: Server.pm
===================================================================
RCS file: /var/cvs/interchange/lib/Vend/Server.pm,v
retrieving revision 2.87
retrieving revision 2.88
diff -u -r2.87 -r2.88
--- Server.pm	5 Feb 2008 16:44:51 -0000	2.87
+++ Server.pm	25 Mar 2008 10:17:18 -0000	2.88
@@ -1,6 +1,6 @@
 # Vend::Server - Listen for Interchange CGI requests as a background server
 #
-# $Id: Server.pm,v 2.87 2008-02-05 16:44:51 kwalsh Exp $
+# $Id: Server.pm,v 2.88 2008-03-25 10:17:18 kwalsh Exp $
 #
 # Copyright (C) 2002-2008 Interchange Development Group
 # Copyright (C) 1996-2002 Red Hat, Inc.
@@ -26,11 +26,12 @@
 package Vend::Server;
 
 use vars qw($VERSION);
-$VERSION = substr(q$Revision: 2.87 $, 10);
+$VERSION = substr(q$Revision: 2.88 $, 10);
 
 use Cwd;
 use POSIX qw(setsid strftime);
 use Vend::Util;
+use Vend::CharSet;
 use Fcntl;
 use Errno qw/:POSIX/;
 use Config;
@@ -247,9 +248,28 @@
 #::logDebug("entity=" . ${$h->{entity}});
 
 	if ("\U$CGI::request_method" eq 'POST') {
-		parse_post(\$CGI::query_string)
-			if $Global::TolerateGet;
-		parse_post($h->{entity});
+#::logDebug("content type header: " . $CGI::content_type);
+		## check for valid content type
+		if (   $CGI::content_type =~ m|^multipart/form-data|
+			|| $CGI::content_type =~ m|^application/x-www-form-urlencoded|) {
+			parse_post(\$CGI::query_string)
+				if $Global::TolerateGet;
+			parse_post($h->{entity});
+		}
+		else {
+			## invalid content type for POST
+			## XXX we may want to be a little more forgiving here
+			my $msg = ::get_locale_message(415, "Unsupported Content-Type for POST Method");
+			my $content_type = $msg =~ /<html/i ? 'text/html' : 'text/plain';
+			my $len = length($msg);
+			$Vend::StatusLine = <<EOF;
+Status: 415 Unsupported Media Type
+Content-Type: $content_type
+Content-Length: $len
+EOF
+			respond('', \$msg);
+			die($msg);
+		}
 	}
 	elsif ("\U$CGI::request_method" eq 'PUT') {
 #::logDebug("Put operation.");
@@ -308,8 +328,20 @@
 
 sub parse_post {
 	my $sref = shift;
-	my(@pairs, $pair, $key, $value);
 	return unless length $$sref;
+
+	my(@pairs, $pair, $key, $value);
+	my $charset;
+
+	if ($CGI::content_type =~ m/charset=(["']?)([-a-zA-Z0-9]+)\1/) {
+		$charset = $2; 
+	}
+	else {
+		$charset = Vend::CharSet->default_charset();
+	}
+
+	$CGI::values{mv_form_charset} = $charset;
+
 	if ($CGI::content_type =~ /^multipart/i) {
 		return parse_multipart($sref) if $CGI::useragent !~ /MSIE\s+5/i;
 		# try and work around an apparent IE5 bug that sends the content type
@@ -352,19 +384,19 @@
 
 #::logDebug("incoming --> $key");
 		$key = $::IV->{$key} if defined $::IV->{$key};
-		$key =~ tr/+/ /;
-		$key =~ s/%([0-9a-fA-F][0-9a-fA-F])/chr(hex $1)/ge;
+		$key = Vend::CharSet->decode_urlencode($key, $charset);
 #::logDebug("mapping  --> $key");
-		$value =~ tr/+/ /;
-		$value =~ s/%([0-9a-fA-F][0-9a-fA-F])/chr(hex $1)/ge;
-		# Handle multiple keys
-		if(defined $CGI::values{$key} and ! defined $::SV{$key}) {
-			$CGI::values{$key} = "$CGI::values{$key}\0$value";
-			push ( @{$CGI::values_array{$key}}, $value)
-		}
-		else {
-			$CGI::values{$key} = $value;
-			$CGI::values_array{$key} = [$value];
+		if ($key) {
+			$value = Vend::CharSet->decode_urlencode($value, $charset);
+			# Handle multiple keys
+			if(defined $CGI::values{$key} and ! defined $::SV{$key}) {
+				$CGI::values{$key} = "$CGI::values{$key}\0$value";
+				push ( @{$CGI::values_array{$key}}, $value);
+			}
+			else {
+				$CGI::values{$key} = $value;
+				$CGI::values_array{$key} = [$value];
+			}
 		}
 	}
 	if (! $redo and "\U$CGI::request_method" eq 'POST') {
@@ -427,6 +459,16 @@
 				next;
 			}
 
+			my ($content_type) = $header{'Content-Type'} =~ /^([^\s;]+)/;
+			my ($charset) = $header{'Content-Type'} =~ / charset="?([-a-zA-Z0-9]+)"?/;
+
+			$content_type ||= 'text/plain';
+			$charset ||= Vend::CharSet->default_charset();
+
+			if ($content_type =~ /text/) {
+				$data = Vend::CharSet->to_internal($charset, $data);
+			}
+
 			if($filename) {
 				$CGI::file{$param} = $data;
 				$data = $filename;
@@ -440,6 +482,7 @@
 	return 1;
 }
 
+
 sub create_cookie {
 	my($domain,$path) = @_;
 	my  $out;
@@ -496,6 +539,8 @@
 	# $body is now a reference
     my ($s, $body) = @_;
 #show_times("begin response send") if $Global::ShowTimes;
+	my $response_charset = Vend::CharSet->default_charset();
+
 	my $status;
 	return if $Vend::Sent;
 	if($Vend::StatusLine) {
@@ -516,7 +561,8 @@
 
 	if(! $s and $Vend::StatusLine) {
 		$Vend::StatusLine .= ($Vend::StatusLine =~ /^Content-Type:/im)
-							? '' : "\r\nContent-Type: text/html\r\n";
+							? '' : "\r\nContent-Type: text/html; charset=$response_charset\r\n";
+
 # TRACK
         $Vend::StatusLine .= "X-Track: " . $Vend::Track->header() . "\r\n"
 			if $Vend::Track;
@@ -630,7 +676,7 @@
 		print $fh canon_status($Vend::StatusLine);
 	}
 	elsif(! $Vend::ResponseMade) {        
-		print $fh canon_status("Content-Type: text/html");
+		print $fh canon_status("Content-Type: text/html; charset=$response_charset");
 # TRACK        
         print $fh canon_status("X-Track: " . $Vend::Track->header())
 			if $Vend::Track;
@@ -895,6 +941,8 @@
     	or return 0;
     show_times('end cgi read') if $Global::ShowTimes;
 
+	binmode(MESSAGE, ":utf8") if $::Variable->{MV_UTF8};
+
     my $http = new Vend::Server \*MESSAGE, \%env, \$entity;
 
     # Can log all CGI inputs



2.116                interchange/lib/Vend/Util.pm


rev 2.116, prev_rev 2.115
Index: Util.pm
===================================================================
RCS file: /var/cvs/interchange/lib/Vend/Util.pm,v
retrieving revision 2.115
retrieving revision 2.116
diff -u -r2.115 -r2.116
--- Util.pm	25 Feb 2008 17:17:35 -0000	2.115
+++ Util.pm	25 Mar 2008 10:17:18 -0000	2.116
@@ -1,6 +1,6 @@
 # Vend::Util - Interchange utility functions
 #
-# $Id: Util.pm,v 2.115 2008-02-25 17:17:35 mheins Exp $
+# $Id: Util.pm,v 2.116 2008-03-25 10:17:18 kwalsh Exp $
 # 
 # Copyright (C) 2002-2007 Interchange Development Group
 # Copyright (C) 1996-2002 Red Hat, Inc.
@@ -91,7 +91,7 @@
 use Vend::File;
 use subs qw(logError logGlobal);
 use vars qw($VERSION @EXPORT @EXPORT_OK);
-$VERSION = substr(q$Revision: 2.115 $, 10);
+$VERSION = substr(q$Revision: 2.116 $, 10);
 
 my $Eval_routine;
 my $Eval_routine_file;
@@ -1182,6 +1182,7 @@
 
 		if (open(MVIN, "< $fn")) {
 			binmode(MVIN) if $Global::Windows;
+			binmode(MVIN, ":utf8") if $::Variable->{MV_UTF8};
 			undef $/;
 			$contents = <MVIN>;
 			close(MVIN);



2.1                  interchange/lib/Vend/CharSet.pm


rev 2.1, prev_rev 2.0



2.47                 interchange/lib/Vend/Table/Common.pm


rev 2.47, prev_rev 2.46
Index: Common.pm
===================================================================
RCS file: /var/cvs/interchange/lib/Vend/Table/Common.pm,v
retrieving revision 2.46
retrieving revision 2.47
diff -u -r2.46 -r2.47
--- Common.pm	12 Mar 2008 20:13:31 -0000	2.46
+++ Common.pm	25 Mar 2008 10:17:18 -0000	2.47
@@ -1,6 +1,6 @@
 # Vend::Table::Common - Common access methods for Interchange databases
 #
-# $Id: Common.pm,v 2.46 2008-03-12 20:13:31 jon Exp $
+# $Id: Common.pm,v 2.47 2008-03-25 10:17:18 kwalsh Exp $
 #
 # Copyright (C) 2002-2007 Interchange Development Group
 # Copyright (C) 1996-2002 Red Hat, Inc.
@@ -23,7 +23,7 @@
 # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
 # MA  02110-1301  USA.
 
-$VERSION = substr(q$Revision: 2.46 $, 10);
+$VERSION = substr(q$Revision: 2.47 $, 10);
 use strict;
 
 package Vend::Table::Common;
@@ -486,7 +486,9 @@
 	$s->filter(\@fields, $s->[$COLUMN_INDEX], $s->[$CONFIG]{FILTER_TO})
 		if $s->[$CONFIG]{FILTER_TO};
 	$s->lock_table();
+
     $s->[$TIE_HASH]{"k$key"} = join("\t", map(stuff($_), @fields));
+
 	$s->unlock_table();
 	return $key;
 }
@@ -1040,6 +1042,8 @@
 			or die errmsg("%s %s: %s\n", errmsg("open"), $infile, $!);
 	}
 
+	new_filehandle(\*IN);
+
 	my $field_hash;
 	my $para_sep;
 	my $codere = '[\w-_#/.]+';
@@ -1211,6 +1215,9 @@
 				$fh = new IO::File "> $infile.$i[$i]";
 				die errmsg("%s %s: %s\n", errmsg("create"), "$infile.$i[$i]",
 				$!) unless defined $fh;
+
+				new_filehandle($fh);
+
 				eval {
 					unlink "$infile.$n[$i]" if -l "$infile.$n[$i]";
 					symlink "$infile.$i[$i]", "$infile.$n[$i]";
@@ -1384,11 +1391,12 @@
 					errmsg("%s %s: %s\n", errmsg("open read/write"), $realfile, $!);
 			lockfile(\*IN, 1, 1)
 				or die errmsg("%s %s: %s\n", errmsg("lock"), $realfile, $!);
+			new_filehandle(\*IN);
 			<IN>;
 			eval $format{$format};
 			die errmsg("%s %s: %s\n", errmsg("import"), $options->{name}, $!) if $@;
 		}
-		elsif (! open(IN, ">$realfile") ) {
+		elsif (! open(IN, ">$realfile") && new_filehandle(\*IN) ) {
 				die errmsg("%s %s: %s\n", errmsg("create"), $realfile, $!);
 		} 
 		else {
@@ -1416,11 +1424,13 @@
 			}
 			else {
 				$fh = new IO::File "$infile.$i[$i]";
+				new_filehandle($fh);
 				my (@lines) = <$fh>;
 				close $fh or die "close: $!";
 				my $option = $o[$i] || 'none';
 				@lines = sort { &{$Sort{$option}} } @lines;
 				$fh = new IO::File ">$infile.$i[$i]";
+				new_filehandle($fh);
 				print $fh @lines;
 				close $fh or die "close: $!";
 			}
@@ -1598,6 +1608,12 @@
 	return $cfg->{last_error} = $msg;
 }
 
+sub new_filehandle {
+	my $fh = shift;
+	binmode($fh, ":utf8") if $::Variable->{MV_UTF8};
+	return $fh;
+}
+
 1;
 
 __END__



2.83                 interchange/lib/Vend/Table/DBI.pm


rev 2.83, prev_rev 2.82
Index: DBI.pm
===================================================================
RCS file: /var/cvs/interchange/lib/Vend/Table/DBI.pm,v
retrieving revision 2.82
retrieving revision 2.83
diff -u -r2.82 -r2.83
--- DBI.pm	12 Mar 2008 20:13:31 -0000	2.82
+++ DBI.pm	25 Mar 2008 10:17:18 -0000	2.83
@@ -1,6 +1,6 @@
 # Vend::Table::DBI - Access a table stored in an DBI/DBD database
 #
-# $Id: DBI.pm,v 2.82 2008-03-12 20:13:31 jon Exp $
+# $Id: DBI.pm,v 2.83 2008-03-25 10:17:18 kwalsh Exp $
 #
 # Copyright (C) 2002-2007 Interchange Development Group
 # Copyright (C) 1996-2002 Red Hat, Inc.
@@ -21,7 +21,7 @@
 # MA  02110-1301  USA.
 
 package Vend::Table::DBI;
-$VERSION = substr(q$Revision: 2.82 $, 10);
+$VERSION = substr(q$Revision: 2.83 $, 10);
 
 use strict;
 no warnings qw(uninitialized numeric);
@@ -69,6 +69,8 @@
 					AUTOCOMMIT     	AutoCommit
 					LONGTRUNCOK    	LongTruncOk
 					LONGREADLEN    	LongReadLen
+                    PG_ENABLE_UTF8  pg_enable_utf8
+				    MYSQL_ENABLE_UTF8 mysql_enable_utf8
 				) );
 my @Dattr = keys %Dattr;
 






More information about the interchange-cvs mailing list