[interchange] Updated UTF8-handling for GDBM files

David Christensen interchange-cvs at icdevgroup.org
Thu Sep 22 16:33:58 UTC 2016


commit 25f07cb57a7347398cd92ffaf42c25aa08f58c82
Author: David Christensen <david at endpoint.com>
Date:   Thu Sep 22 10:47:05 2016 -0500

    Updated UTF8-handling for GDBM files
    
    Remove the existing cruft related to GDBM_ENABLE_UTF8, which
    appears never to have worked entirely correctly.
    
    In the meantime, tie this explicitly to whether UTF8 is enabled in the
    catalog only and don't configure separately.
    
    Also make sure :utf8 is set for data files read in and out.
    
    Make sure we respect the MINIVEND_DISABLE_UTF8 environment variable.

 lib/Vend/Data.pm         |    5 ++++-
 lib/Vend/Table/Common.pm |   26 ++++++++++++++++++++++----
 lib/Vend/Table/GDBM.pm   |   29 +----------------------------
 3 files changed, 27 insertions(+), 33 deletions(-)
---
diff --git a/lib/Vend/Data.pm b/lib/Vend/Data.pm
index f8c97f9..5313f72 100644
--- a/lib/Vend/Data.pm
+++ b/lib/Vend/Data.pm
@@ -1248,7 +1248,10 @@ sub export_database {
 		or die "Couldn't exclusive lock $file: $!\n";
 	open(EXPORT, "+>$file") or
 	   	die "Couldn't write $file: $!\n";
-	
+
+	# we should be outputting as UTF8 if we're so configured
+	binmode(\*EXPORT, ':utf8') if $::Variable->{MV_UTF8} || $Global::Variable->{MV_UTF8};
+
 #::logDebug("EXPORT_SORT=" . $db->config('EXPORT_SORT'));
 	if($opt->{sort} ||= $db->config('EXPORT_SORT')) {
 #::logDebug("Found EXPORT_SORT=$opt->{sort}");
diff --git a/lib/Vend/Table/Common.pm b/lib/Vend/Table/Common.pm
index 510a9f5..d3a39ce 100644
--- a/lib/Vend/Table/Common.pm
+++ b/lib/Vend/Table/Common.pm
@@ -1,8 +1,6 @@
 # Vend::Table::Common - Common access methods for Interchange databases
 #
-# $Id: Common.pm,v 2.51 2008-05-26 02:30:04 markj Exp $
-#
-# Copyright (C) 2002-2008 Interchange Development Group
+# Copyright (C) 2002-2016 Interchange Development Group
 # Copyright (C) 1996-2002 Red Hat, Inc.
 #
 # This program was originally based on Vend 0.2 and 0.3
@@ -23,7 +21,7 @@
 # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
 # MA  02110-1301  USA.
 
-$VERSION = substr(q$Revision: 2.51 $, 10);
+$VERSION = '2.52';
 use strict;
 
 package Vend::Table::Common;
@@ -34,6 +32,19 @@ no warnings qw(uninitialized numeric);
 use Symbol;
 use Vend::Util;
 
+our $Has_Encode = 0;
+
+if ($ENV{MINIVEND_DISABLE_UTF8}) {
+	# stub routines to pass-thru data if disabled
+	*encode_utf8 = sub {@_};
+	*decode_utf8 = sub {@_};
+}
+else {
+	require Encode;
+	import Encode qw( encode_utf8 decode_utf8 );
+	$Has_Encode = 1;
+}
+
 use Exporter;
 use vars qw($Storable $VERSION @EXPORT @EXPORT_OK);
 @EXPORT = qw(create_columns import_ascii_delimited import_csv config columns);
@@ -164,6 +175,8 @@ sub unlock_table {
 
 sub stuff {
     my ($val) = @_;
+    $val = encode_utf8($val)
+        if $Has_Encode && ($::Variable->{MV_UTF8} || $Global::Variable->{MV_UTF8});
     $val =~ s,([\t\%]),$Hex_string[ord($1)],eg;
     return $val;
 }
@@ -171,6 +184,8 @@ sub stuff {
 sub unstuff {
     my ($val) = @_;
     $val =~ s,%(..),chr(hex($1)),eg;
+    $val = decode_utf8($val)
+        if $Has_Encode && ($::Variable->{MV_UTF8} || $Global::Variable->{MV_UTF8});
     return $val;
 }
 
@@ -1080,6 +1095,9 @@ sub import_ascii_delimited {
 
 	new_filehandle(\*IN);
 
+	# we should be inputting as UTF8 if we're so configured
+	binmode(\*IN, ':utf8') if $::Variable->{MV_UTF8} || $Global::Variable->{MV_UTF8};
+
 	my $field_hash;
 	my $para_sep;
 	my $codere = '[\w-_#/.]+';
diff --git a/lib/Vend/Table/GDBM.pm b/lib/Vend/Table/GDBM.pm
index 55a4610..5b6a5a2 100644
--- a/lib/Vend/Table/GDBM.pm
+++ b/lib/Vend/Table/GDBM.pm
@@ -27,17 +27,8 @@ use vars qw($VERSION @ISA);
 use GDBM_File;
 use Vend::Table::Common;
 
-if ($ENV{MINIVEND_DISABLE_UTF8}) {
-	sub encode($$;$){}
-	sub decode($$;$){}
-}
-else {
-	require Encode;
-	import Encode qw( decode encode );
-}
-
 @ISA = qw(Vend::Table::Common);
-$VERSION = '2.21';
+$VERSION = '2.22';
 
 sub new {
 	my ($class, $obj) = @_;
@@ -120,8 +111,6 @@ sub open_table {
 	die ::errmsg("%s could not tie to '%s': %s", 'GDBM', $filename, $!)
 		unless $dbm;
 
-	apply_utf8_filters($dbm) if $config->{GDBM_ENABLE_UTF8};
-
 	my $columns = [split(/\t/, $tie->{'c'})];
 
 	$config->{VERBATIM_FIELDS} = 1 unless defined $config->{VERBATIM_FIELDS};
@@ -140,22 +129,6 @@ sub open_table {
 	bless $s, $class;
 }
 
-sub apply_utf8_filters {
-	my ($handle) = shift;
-
-#::logDebug("applying UTF-8 filters to GDBM handle");
-
-	my $out_filter = sub { $_ = encode('utf8', $_) };
-	my $in_filter  = sub { $_ = decode('utf8', $_) };
-
-	$handle->filter_store_key($out_filter);
-	$handle->filter_store_value($out_filter);
-	$handle->filter_fetch_key($in_filter);
-	$handle->filter_fetch_value($in_filter);
-
-	return $handle;
-}
-
 # Unfortunate hack need for Safe searches
 *column_index	= \&Vend::Table::Common::column_index;
 *column_exists	= \&Vend::Table::Common::column_exists;



More information about the interchange-cvs mailing list