[docs] xmldocs - docelic modified bin/stattree
docs at icdevgroup.org
docs at icdevgroup.org
Tue Sep 21 12:04:27 EDT 2004
User: docelic
Date: 2004-09-21 16:04:26 GMT
Modified: bin stattree
Log:
bin/stattree:
- Now retrieves CVS information for files (cvs rev. and last-modified date)
- We don't perform regexes and discovery inline any more, but call separate
functions that examine each line
- Don't report warning for ./configure file in pre-5.0 versions (was silly)
- format_ctx() should identify the common number of spaces in all context
lines and trim them (but doesn't appear to work right now)
Revision Changes Path
1.16 +188 -79 xmldocs/bin/stattree
rev 1.16, prev_rev 1.15
Index: stattree
===================================================================
RCS file: /var/cvs/xmldocs/bin/stattree,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -r1.15 -r1.16
--- stattree 25 Aug 2004 09:53:03 -0000 1.15
+++ stattree 21 Sep 2004 16:04:26 -0000 1.16
@@ -1,12 +1,18 @@
#!/usr/bin/perl
#
-# Perl script to build Interchange source tree statistics.
+# docelic at icdevgroup.org
+#
+# Perl script to build Interchange source tree statistics,
+# contexts and symbols information.
# Data is collected and dumped using Storable, that's where
# this script's job ends.
#
# Other tools can then only read the dump to get information,
# they shouldn't parse the sources again.
#
+# Now that I learned most of the requirements, looks like this
+# script is up to the task and is pretty decent altogether
+#
use warnings;
use strict;
@@ -16,6 +22,7 @@
use Getopt::Long;
use Storable qw/nstore_fd fd_retrieve/;
use Fcntl qw/:DEFAULT :flock/;
+use File::Basename;
# Dumper behavior
$Data::Dumper::Indent = 1;
@@ -30,6 +37,7 @@
# Source contexts
my $ctx_p = 10; # How much context lines to show before
my $ctx_n = 4; # after
+my %cvsmap; # Each key (full filename) contains array: [ver, date]
# All file types should be listed here or the stats wont. It should be
# reported if an unknown file is found in the archive
@@ -87,8 +95,28 @@
`mkdir -p $cachedir/$dumpdir` unless -e "$cachedir/$dumpdir";
$dumppath .= "/.cache.bin";
-# Convenient routine to just discover all files in a module
+# Ok, onto some work now
chdir $path; # Go into the directory
+
+# Let's just pick CVS lines first:
+my @files = `find . -name Entries | cut -b 3-`;
+for my $entry ( @files ) {
+ chomp $entry;
+ my $dir;
+ ( $dir = $entry ) =~ s#/?CVS/Entries$## or
+ warn "Can't remove Entries\$ ('$dir')?\n";
+ open CVSIN, "< $entry" or warn "Can't open '$entry' ($!)\n";
+ while ( my $cvsline = <CVSIN> ) {
+ next unless $cvsline =~ s#^/##;
+ my ($fn, $ver, $time) = split /\//, $cvsline;
+
+ $path = $dir ? "$dir/$fn" : $fn;
+ $hash{revision}{"$path"} = [ $ver, $time ];
+ }
+ close CVSIN;
+}
+
+# Convenient routine to just discover all files
find({
wanted => \&wanted,
bydepth => 0,
@@ -190,22 +218,37 @@
# Open text file, parse contents
open IN, "< $file" or die "Can't open $file ($!)\n";
- my @file = <IN>;
- unshift @file, ""; # Match index numbers with line numbers
+ my @filedata = <IN>;
+ unshift @filedata, ""; # Match index numbers with line numbers
close IN;
- # Global variables to use from the loop below
- my @gfunc = (qw/unknown -1/); # 2 elements: func name, line num
+ # Global variables to use in the loop below
+ my @gfunc = (qw/unknown 0/); # 2 elements: func name, line num
+
+ # Prepare context hash so that all data is available to the
+ # line handler functions later
+ my %c = (
+ file => $file,
+ ftype => $ftype,
+ fsubtype => $fsubtype,
+ fext => $fext,
+ gfunc => \@gfunc,
+ filedata => \@filedata,
+ ctx_p => $ctx_p,
+ ctx_n => $ctx_n,
+ );
+
- for (my $lnum = 1; $lnum < scalar @file; $lnum++) {
- my $line = $file[$lnum];
+ for (my $lnum = 1; $lnum < scalar @filedata; $lnum++) {
+ $c{line} = $filedata[$lnum];
+ $c{lnum} = $lnum;
$hash{tree}{$file}{lines}++ ;
$hash{total}{lines}++ ;
# It's the main configure file, pick up the ICVERSION variable
- if ( $file eq 'configure' ) {
- for $_ (@file) {
+ if ( $c{file} eq 'configure' ) {
+ for $_ (@filedata) {
if ( /^ICVERSION=(['"])(\d+\.\d+\.\d+)\1/ ) {
if ( !$hash{version} or "$hash{version}" eq "$2" ) {
$hash{version} = $2
@@ -218,98 +261,60 @@
unless ( $hash{version} ) {
warn "Wasn't able to determine " .
'^ICVERSION=([\'"])(\d+\.\d+\.\d+)\1 from the main ' .
- "./configure script. Will use one from directory name. " .
- "(This is OK for pre-5.0 versions).\n";
+ "./configure script. Will use one from directory name ($i{ver}).\n"
+ unless $i{ver} =~ /^4/; # ignore warning for 4.x versions
$hash{version} = $i{ver};
}
# Perl program file
- } elsif ( $fsubtype eq 'perl' ) {
+ } elsif ( $c{fsubtype} eq 'perl' ) {
my $pod = 0;
- if ( $line =~ /^\s*#/ ) {
- $hash{tree}{$file}{comments}++ ;
+ if ( $c{line} =~ /^\s*#/ ) {
+ $hash{tree}{$c{file}}{comments}++ ;
$hash{total}{perl_comments}++;
# TODO check if the comment contains ::log[A-Z]\w+
- # to discover calls to logGlobal/logDebug
- } elsif ( $line =~ /\s+#/ ) {
- $hash{tree}{$file}{gray}++ ;
+ # to discover calls to logGlobal/logDebug/logError
+ } elsif ( $c{line} =~ /\s+#/ ) {
+ $hash{tree}{$c{file}}{gray}++ ;
$hash{total}{perl_gray}++;
- } elsif ( $line =~ /^=(head|over|item)/) {
- $hash{tree}{$file}{pod}++;
+ } elsif ( $c{line} =~ /^=(head|over|item)/) {
+ $hash{tree}{$c{file}}{pod}++;
$hash{total}{perl_pod}++;
$pod++;
- } elsif ( $line =~ /^=cut/) {
- $hash{tree}{$file}{pod}++;
+ } elsif ( $c{line} =~ /^=cut/) {
+ $hash{tree}{$c{file}}{pod}++;
$hash{total}{perl_pod}++;
$pod = 0;
- } elsif ( $line =~ /^\s*$/ ) {
- $hash{tree}{$file}{empty}++;
+ } elsif ( $c{line} =~ /^\s*$/ ) {
+ $hash{tree}{$c{file}}{empty}++;
$hash{total}{perl_empty}++;
} elsif ( ! $pod ) {
# The default is code
- $hash{tree}{$file}{code}++;
+ $hash{tree}{$c{file}}{code}++;
$hash{total}{perl_code}++;
- ######################################################
- # Discover pragmas
- # Ph33r, PH33R my MaD R3G3X skiLLz! ;-)
- # This matches $::Pragma->{} or $$::Pragma{}
- #if ( $line =~ /(()|\$())\$::Pragma(->\2|\3){(\w+?)}/ ) {
- if ( $line =~ /\$::Pragma->{(\w+?)}/ or
- $line =~ /\$Vend::Cfg->{Pragma}{(\w+?)}/ ) {
- #push @{ $hash{symbols}{pragma}{$5} }, <- for use with above
- push @{ $hash{symbols}{pragma}{$1} },
- # TODO Here, and 2 places below: make sure if ctx is say, 5:5,
- # it always shows that much (that is, workaround file beginning/
- # file end problems - pad with empty lines or something).
- [ "$i{ver}/$file", $lnum, "$gfunc[0]:$gfunc[1] $ctx_p\:$ctx_n", [format_ctx(@file[$lnum-$ctx_p..$lnum+$ctx_n])] ];
- }
-
- ######################################################
- # Diskover global variables
- if ( $line =~ /(()|\$())\$Global::Variable(->\2|\3){(\w+?)}/ ) {
- push @{ $hash{symbols}{globvar}{$5} },
- [ "$i{ver}/$file", $lnum, "$gfunc[0]:$gfunc[1] $ctx_p\:$ctx_n", [format_ctx(@file[$lnum-$ctx_p..$lnum+$ctx_n])] ];
- }
- # A little catch
- if ( $line =~ /\$Tag->var\s*\(\s*(["'])(\S+?)\1(\s*,\s*(\d))?/ ) {
- push @{ $hash{symbols}{globvar}{$5} },
- [ "$i{ver}/$file", $lnum, "$gfunc[0]:$gfunc[1] $ctx_p\:$ctx_n", [format_ctx(@file[$lnum-$ctx_p..$lnum+$ctx_n])] ];
- warn "\$Tag->var syntax never used in .pl/.pm files by now.
- I'll register that, but is it a mistake? ($2, $4)\n";
- }
-
- ######################################################
- # See if it's a beginning of a subroutine name, and remember the
- # name/linenum.
- if ( $line =~ m#^\s*sub\s+(\w+)\s*\{\s*$# ) {
- $hash{total}{perl_functions}++;
- @gfunc = ( $1, $lnum );
- }
+ line_findPragmas(\%c);
+ line_findGlobVars(\%c);
+ line_findFunctionName(\%c);
} else {
- warn "IMPOSSIBLE\n";
+ warn "IMPOSSIBLE case in $file\n";
}
- # TODO:
- # - parse contents, identify blocks and add to appropriate
- # counters.
- # - code with inline comments or html with tags count as "gray area"
- # (adds 1 to both)
-
##########################################################
# Found a tag
- } elsif ( $fsubtype =~ /^(user|ui|system)tag$/ ) {
+ } elsif ( $c{fsubtype} =~ /^(user|ui|system)tag$/ ) {
#$hash{total}{$fsubtype . "s"}++;
- $file =~ m#(\w+?)\.(core)?tag$# or
- warn "I know $file is a tag but regex doesn't match it\n";
+ $c{file} =~ m#(\w+?)\.(core)?tag$# or
+ warn "I know $c{file} is a tag but regex doesn't match it\n";
my %specific; # Item-specific data
my @tags; # Support multiple tags defined in the same file
# This is where we parse the tag file. We should discover settings like
# HasEndTag, Order, and so on.
- for my $_l (@file) {
+ for (my $lnum = 1; $lnum < scalar @filedata; $lnum++) {
+ my $_l = $filedata[$lnum];
next unless $_l =~ /^usertag\s/i;
my @lis = split /\s+/, $_l;
shift @lis; # Remove "Usertag" which is first in the list
@@ -319,13 +324,33 @@
my $tn = shift @lis;
my $tagopt = shift @lis;
- next if $tagopt =~ /^(routine|documentation)$/i;
- if ( "@lis" =~ /<</ ) {
- warn "TODO: Unsupported << in $tn\n";
+ next if $tagopt =~ /^documentation$/i;
+
+ # See if it's a routine and parse routine lines as usual perl lines
+ if ( $tagopt =~ /^routine$/i and "@lis" =~ /\s*<<(\S+)\s*$/i ) {
+ my $ender = $1;
+ for (my $lnum2 = $lnum; $lnum2 < scalar @filedata; $lnum2++) {
+ my $_t = $filedata[$lnum2];
+ last if $_t =~ /^$ender$/;
+
+ $c{line} = $_t;
+ $c{lnum} = $lnum2;
+
+ # For example, this should find PGP_HOME
+ line_findPragmas(\%c);
+ line_findGlobVars(\%c);
+ line_findFunctionName(\%c);
+ }
+ next;
+ }
+
+ if ( "@lis" =~ /<</ and $tagopt !~ /^routine/i ) {
+ warn "TODO: Unsupported << in $tn (in $i{ver})\n";
next;
}
- # New tag name we didn't see yet (either first run or another tag defined)
+ # New tag name we didn't see yet
+ # (either first run or another tag definition in the same file)
{ no warnings;
if (!grep{/^$tn$/}@tags and $lis[0] !~ /^alias$/i) { push @tags, $tn }
}
@@ -335,9 +360,19 @@
# For each tag found in the file, create appropriate data in hash.
for my $tagname ( @tags ) {
- push @{ $hash{symbols}{$fsubtype}{$tagname} },
- [ "$i{ver}/$file", scalar @file, "1:" . scalar @file, [ format_ctx(@file) ] ];
- $hash{specific}{$tagname} = $specific{$tagname};
+ push @{ $hash{symbols}{$c{fsubtype}}{$tagname} }, {
+ file => "$i{ver}/$c{file}",
+ lnum => scalar @filedata,
+ ctxpre => $ctx_p,
+ ctxpost => $ctx_n,
+ ctxs => 1,
+ ctxe => scalar @filedata,
+ ctx => [ format_ctx(@filedata) ] };
+
+ $hash{specific}{$tagname} = { # Append/update, don't overwrite
+ %{ $hash{specific}{$tagname} || {}},
+ %{ $specific{$tagname} || {}},
+ };
}
last;
@@ -348,6 +383,23 @@
# Format the context lines before saving to the db
sub format_ctx {
map { s/\t/ /g; chomp } @_;
+ # Find the number of starting whitespace common to all lines
+ # and trim it. XXX doesn't work
+ my $cs = 1000;
+ for my $ol ( @_ ) {
+ $ol =~ /^(\s*)/;
+ my $space_len = length $1;
+ $cs = $space_len if $space_len < $cs;
+ # Catch runaway lines (produces crapload of things, so refine
+ # search before starting to use it)
+ #if ( $ol =~ /\S\s{10,}/ ) {
+ # warn "Runaway '$ol' ?\n";
+ #}
+ }
+ if ( $cs ) { # There is something to trim
+ #print "Trimming by $cs\n"; # XXX <- should be more of those ?!
+ map { s/^\s{$cs}// or warn "LTRIM: No $cs spaces?\n" } @_
+ }
return @_;
}
@@ -366,5 +418,62 @@
close OUT or
die "Can't properly close database ($dbpath) ($!).\n";
+}
+
+# HELPERS
+
+sub line_findPragmas {
+ my %ptr = %{ (shift) };
+ # This matches $::Pragma->{} or $$::Pragma{}
+ #if ( $line =~ /(()|\$())\$::Pragma(->\2|\3){(\w+?)}/ ) {
+ if ( $ptr{line} =~ /\$::Pragma->{(\w+?)}/ or
+ $ptr{line} =~ /\$Vend::Cfg->{Pragma}{(\w+?)}/ ) {
+ #push @{ $hash{symbols}{pragma}{$5} }, <- for use with above
+ push @{ $hash{symbols}{pragma}{$1} }, {
+ # TODO Here, and 2 places below: make sure if ctx is say, 5:5,
+ # it always shows that much (that is, workaround file beginning/
+ # file end problems - pad with empty lines or something).
+ file => "$i{ver}/$ptr{file}",
+ lnum => $ptr{lnum},
+ func => ${$ptr{gfunc}}[0],
+ funclnum => ${$ptr{gfunc}}[1],
+ ctxpre => $ptr{ctx_p},
+ ctxpost => $ptr{ctx_n},
+ ctxs => $ptr{lnum} - $ptr{ctx_p},
+ ctxe => $ptr{lnum} + $ptr{ctx_n},
+ ctx => [format_ctx(@{$ptr{filedata}}[$ptr{lnum}-$ptr{ctx_p}..$ptr{lnum}+$ptr{ctx_n}])]
+ };
+ }
+}
+
+sub line_findGlobVars {
+ ######################################################
+ # Diskover global variables
+ my %c = %{ (shift) };
+ if ( ( $c{line} =~ /(()|\$())\$Global::Variable(->\2|\3){(\w+?)}/ or
+ $c{line} =~ /\$Tag->var\s*\(\s*(["'])(\S+?)\1(\s*,\s*(\d))?/ ) and $5 ) {
+ push @{ $hash{symbols}{globvar}{$5} }, {
+ file => "$i{ver}/$c{file}",
+ lnum => $c{lnum},
+ func => ${$c{gfunc}}[0],
+ funclnum => ${$c{gfunc}}[1],
+ ctxpre => $c{ctx_p},
+ ctxpost => $c{ctx_n},
+ ctxs => $c{lnum} - $c{ctx_p},
+ ctxe => $c{lnum} + $c{ctx_n},
+ ctx => [format_ctx(@{$c{filedata}}[$c{lnum}-$c{ctx_p}..$c{lnum}+$c{ctx_n}])]
+ };
+ }
+}
+
+sub line_findFunctionName {
+ ######################################################
+ # See if it's a beginning of a subroutine name, and remember the
+ # name/linenum.
+ my %c = %{ (shift) };
+ if ( $c{line} =~ m#^\s*sub\s+(\w+)\s*\{\s*$# ) {
+ $hash{total}{perl_functions}++;
+ @{ $c{gfunc} } = ( $1, $c{lnum} );
+ }
}
More information about the docs
mailing list