13 years ago · 1ff854fbd3
--- a/bin/mksheet.pl
+++ b/bin/mksheet.pl
@@ -0,0 +1,98 @@
 
				+#! perl -w
			
 
				+
			
 
				+## Author: Alois Mahdal at zxcvb cz
			
 
				+# Analyzer for a very primitive remote logging system Front-end is htlogr.pm,
			
 
				+# back-end is htlog.cgi
			
 
				+
			
 
				+# This program is free software: you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU General Public License as published by
			
 
				+# the Free Software Foundation, either version 3 of the License, or
			
 
				+# (at your option) any later version.
			
 
				+
			
 
				+# This program is distributed in the hope that it will be useful,
			
 
				+# but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+# GNU General Public License for more details.
			
 
				+
			
 
				+# You should have received a copy of the GNU General Public License
			
 
				+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+
			
 
				+use htsheet;
			
 
				+use strict;
			
 
				+use warnings;
			
 
				+
			
 
				+use Getopt::Long;
			
 
				+
			
 
				+sub guess_subsets;
			
 
				+
			
 
				+$| = 1;
			
 
				+my $LOGFILE     = "";
			
 
				+my $STORAGE     = "split_csv";
			
 
				+my $PREFIX      = "";
			
 
				+my @SUBSETS     = qw//;
			
 
				+
			
 
				+GetOptions(
			
 
				+    "input=s"   => \$LOGFILE,
			
 
				+    "storage=s" => \$STORAGE,
			
 
				+    "prefix=s"  => \$PREFIX,
			
 
				+    "subset=s"  => \@SUBSETS
			
 
				+);
			
 
				+
			
 
				+unless ($LOGFILE) {
			
 
				+    warn "usage: $0 --input=htlog.log [--prefix=mytest] [--storage=csv_render] [--subset=pattern]*\n";
			
 
				+    exit 1;
			
 
				+}
			
 
				+unless (@SUBSETS) {
			
 
				+    warn "no --subset(s) specified, will try to guess from tag";
			
 
				+}
			
 
				+
			
 
				+print "loading log...";
			
 
				+my $s = htsheet->load({file => $LOGFILE});
			
 
				+print "OK\n";
			
 
				+
			
 
				+print "parsing log...";
			
 
				+$s->parse_all;
			
 
				+my @tags = @{$s->get_unique_values_of("Tag")};
			
 
				+print "OK\n";
			
 
				+
			
 
				+
			
 
				+mkdir $STORAGE;
			
 
				+print "processing tags:\n";
			
 
				+# take each tag and process data from it based on subsets
			
 
				+# so that within tag we have control over which lines contain parseable data
			
 
				+TAG: foreach my $tag (@tags) {
			
 
				+    print "  $tag...";
			
 
				+
			
 
				+    my $t = $s->grep($tag);
			
 
				+    
			
 
				+    my @subsets_to_go = guess_subsets($tag)
			
 
				+        or warn "no subsets available for tag $tag\n";
			
 
				+
			
 
				+    SUBSET: foreach my $subset (@subsets_to_go) {
			
 
				+        # grep down from tag to subset
			
 
				+        my $s = $t->grep($subset);
			
 
				+        $s->parse_all;
			
 
				+
			
 
				+        # save to separate CSV
			
 
				+        my $fname = sprintf(
			
 
				+            "%s/%s%s--%s.csv",
			
 
				+            $STORAGE,
			
 
				+            ($PREFIX ? "$PREFIX--" : ""),
			
 
				+            $tag,
			
 
				+            $subset
			
 
				+        );
			
 
				+        open my $fh, ">", $fname    or die "could not clobber $fname: $!\n";
			
 
				+        print $fh $s->to_csv;
			
 
				+        close $fh or die "could not close file $_: $!";
			
 
				+    }
			
 
				+    print "OK\n";
			
 
				+}
			
 
				+
			
 
				+
			
 
				+sub guess_subsets {
			
 
				+    my $_ = shift;
			
 
				+    return @SUBSETS if @SUBSETS;
			
 
				+    return qw/ rendered pmfree /    if m|sunspider|;
			
 
				+    return qw/ avg_rr_queue /       if m|showlist_pl|;
			
 
				+}
			
 
				+
			
--- a/lib/htsheet.pm
+++ b/lib/htsheet.pm
@@ -0,0 +1,181 @@
 
				+#!perl -w
			
 
				+
			
 
				+## Author: Alois Mahdal at zxcvb cz
			
 
				+# Helper class to help parse logs from a very primitive remote logging
			
 
				+# service.  Used by minions/bin/mksheet.pl to parse logs from htlog.cgi
			
 
				+
			
 
				+# This program is free software: you can redistribute it and/or modify
			
 
				+# it under the terms of the GNU General Public License as published by
			
 
				+# the Free Software Foundation, either version 3 of the License, or
			
 
				+# (at your option) any later version.
			
 
				+
			
 
				+# This program is distributed in the hope that it will be useful,
			
 
				+# but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+# GNU General Public License for more details.
			
 
				+
			
 
				+# You should have received a copy of the GNU General Public License
			
 
				+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+
			
 
				+
			
 
				+package htsheet;
			
 
				+use strict;
			
 
				+use warnings;
			
 
				+use Carp;
			
 
				+
			
 
				+
			
 
				+##############
			
 
				+# load logfile
			
 
				+sub load {
			
 
				+    my ($class, $args) = @_;
			
 
				+
			
 
				+    # make yourself
			
 
				+    my $self = {};
			
 
				+    $self->{data}->{main_columns}   = [ qw/Time Origin Tag I/ ];
			
 
				+    $self->{parsed}                 = 0;
			
 
				+    $self->{lines}                  = [];
			
 
				+    $self->{file}                   = $args->{file}
			
 
				+        or croak("missing mandatory option: file");
			
 
				+
			
 
				+    # slurp the file
			
 
				+    open my $fh, "<", $self->{file}
			
 
				+        or croak("could not open file: $!");
			
 
				+    @{$self->{lines}} = <$fh>;
			
 
				+    chomp @{$self->{lines}};
			
 
				+    close $fh or croak("could not close file: $!");
			
 
				+    carp("zero lines loaded") unless @{$self->{lines}};
			
 
				+
			
 
				+    # bless and go
			
 
				+    bless $self, $class;
			
 
				+    return $self;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+###############################################
			
 
				+# parse out common fields and message from line
			
 
				+sub parse_line {
			
 
				+    my ($self, $line) = @_;
			
 
				+    my $row;
			
 
				+    my $row_head;
			
 
				+    my $row_data;
			
 
				+
			
 
				+    ($row_head->{Time})     = $line
			
 
				+        =~ m|Time: (\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d);|;
			
 
				+    ($row_head->{Origin})   = $line =~ m|Origin: (\S*);|;
			
 
				+    ($row_head->{Tag})      = $line =~ m|Tag: (.*?);|;
			
 
				+    ($row_head->{I})        = $line =~ m|I: (.*?);|;
			
 
				+    ($row_head->{Message})  = $line =~ m|Message: (.*)$|;
			
 
				+
			
 
				+    # parse oout Message: and throw it away
			
 
				+    $row_data = $self->parse_message($row_head->{Message});
			
 
				+    delete $row_head->{Message};
			
 
				+
			
 
				+    # merge head and data
			
 
				+    %$row = (%$row_head, %$row_data);
			
 
				+    return $row;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+################################
			
 
				+# parse data part of the message
			
 
				+sub parse_message {
			
 
				+    my ($self, $message) = @_;
			
 
				+    my $data;
			
 
				+
			
 
				+    # split to fields
			
 
				+    my @fields  = split ";", $message;
			
 
				+    %$data      = map {
			
 
				+        my @fld = split "=", $_;
			
 
				+        ( 2 == scalar @fld ? @fld : (dummy => 'non-parseable') )
			
 
				+    } @fields;
			
 
				+
			
 
				+    # merge field ames with those already found
			
 
				+    $self->{data}->{data_columns}->{$_}++ foreach (keys %$data);
			
 
				+
			
 
				+    return $data;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#################
			
 
				+# parse all lines
			
 
				+sub parse_all {
			
 
				+    my ($self) = @_;
			
 
				+    foreach (@{$self->{lines}}) {
			
 
				+        push @{$self->{data}->{rows}}, $self->parse_line($_);
			
 
				+    }
			
 
				+    return $self->{parsed} = 1;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+###########################################
			
 
				+# render (non-tabular) rows into CSV string
			
 
				+sub to_csv {
			
 
				+    my $self = shift;
			
 
				+    my $output;
			
 
				+
			
 
				+    # parse if it hasn't already been
			
 
				+    $self->{parsed} or $self->parse_all;
			
 
				+
			
 
				+    # create list of columns
			
 
				+    my @columns = (
			
 
				+        @{$self->{data}->{main_columns}},
			
 
				+        sort keys %{$self->{data}->{data_columns}},
			
 
				+    );
			
 
				+
			
 
				+    $output .= $self->array_to_csv(@columns);
			
 
				+
			
 
				+    foreach my $row (@{$self->{data}->{rows}}) {
			
 
				+        my @line;
			
 
				+        foreach (@columns) {
			
 
				+            push @line, (defined $row->{$_} ? $row->{$_} : "" );
			
 
				+        }
			
 
				+        $output .= $self->array_to_csv(@line);
			
 
				+    }
			
 
				+    return $output;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+################
			
 
				+# CSV line maker
			
 
				+sub array_to_csv {
			
 
				+    my $self = shift;
			
 
				+    my @out;
			
 
				+    foreach (@_) {
			
 
				+        push @out, "\"$_\"";
			
 
				+    };
			
 
				+    return join(";" , @out) . "\n";
			
 
				+}
			
 
				+
			
 
				+
			
 
				+######################################
			
 
				+# get list of unique values for column
			
 
				+sub get_unique_values_of {
			
 
				+    my ($self, $column) = @_;
			
 
				+
			
 
				+    # parse if it hasn't already been
			
 
				+    $self->{parsed} or $self->parse_all;
			
 
				+
			
 
				+    # count how many times which value is seen
			
 
				+    my %values;
			
 
				+    $values{$_->{$column}}++
			
 
				+        foreach @{ $self->{data}->{rows} };
			
 
				+   
			
 
				+    return [ keys %values ];
			
 
				+}
			
 
				+
			
 
				+
			
 
				+##################################################
			
 
				+# create your clone only with lines matching regex
			
 
				+sub grep {
			
 
				+    my ($self, $query) = @_;
			
 
				+    my $copy;
			
 
				+
			
 
				+    $copy->{data}->{main_columns}   = [ @{$self->{data}->{main_columns}} ];
			
 
				+    $copy->{lines}                  = [ grep {m|$query|} @{$self->{lines}} ];
			
 
				+    carp "grep returned zero lines" unless @{$copy->{lines}};
			
 
				+
			
 
				+    bless $copy, ref $self;
			
 
				+    return $copy;
			
 
				+}
			
 
				+
			
 
				+1;