12e5b6d6dSopenharmony_ci#!/usr/local/bin/perl 22e5b6d6dSopenharmony_ci# ******************************************************************** 32e5b6d6dSopenharmony_ci# * COPYRIGHT: 42e5b6d6dSopenharmony_ci# * © 2016 and later: Unicode, Inc. and others. 52e5b6d6dSopenharmony_ci# * License & terms of use: http://www.unicode.org/copyright.html 62e5b6d6dSopenharmony_ci# * Copyright (c) 2006, International Business Machines Corporation and 72e5b6d6dSopenharmony_ci# * others. All Rights Reserved. 82e5b6d6dSopenharmony_ci# ******************************************************************** 92e5b6d6dSopenharmony_ci 102e5b6d6dSopenharmony_cipackage Dataset; 112e5b6d6dSopenharmony_ciuse Statistics::Descriptive; 122e5b6d6dSopenharmony_ciuse Statistics::Distributions; 132e5b6d6dSopenharmony_ciuse strict; 142e5b6d6dSopenharmony_ci 152e5b6d6dSopenharmony_ci# Create a new Dataset with the given data. 162e5b6d6dSopenharmony_cisub new { 172e5b6d6dSopenharmony_ci my ($class) = shift; 182e5b6d6dSopenharmony_ci my $self = bless { 192e5b6d6dSopenharmony_ci _data => \@_, 202e5b6d6dSopenharmony_ci _scale => 1.0, 212e5b6d6dSopenharmony_ci _mean => 0.0, 222e5b6d6dSopenharmony_ci _error => 0.0, 232e5b6d6dSopenharmony_ci }, $class; 242e5b6d6dSopenharmony_ci 252e5b6d6dSopenharmony_ci my $n = @_; 262e5b6d6dSopenharmony_ci 272e5b6d6dSopenharmony_ci if ($n >= 1) { 282e5b6d6dSopenharmony_ci my $stats = Statistics::Descriptive::Full->new(); 292e5b6d6dSopenharmony_ci $stats->add_data(@{$self->{_data}}); 302e5b6d6dSopenharmony_ci $self->{_mean} = $stats->mean(); 312e5b6d6dSopenharmony_ci 322e5b6d6dSopenharmony_ci if ($n >= 2) { 332e5b6d6dSopenharmony_ci # Use a t distribution rather than Gaussian because (a) we 342e5b6d6dSopenharmony_ci # assume an underlying normal dist, (b) we do not know the 352e5b6d6dSopenharmony_ci # standard deviation -- we estimate it from the data, and (c) 362e5b6d6dSopenharmony_ci # we MAY have a small sample size (also works for large n). 372e5b6d6dSopenharmony_ci my $t = Statistics::Distributions::tdistr($n-1, 0.005); 382e5b6d6dSopenharmony_ci $self->{_error} = $t * $stats->standard_deviation(); 392e5b6d6dSopenharmony_ci } 402e5b6d6dSopenharmony_ci } 412e5b6d6dSopenharmony_ci 422e5b6d6dSopenharmony_ci $self; 432e5b6d6dSopenharmony_ci} 442e5b6d6dSopenharmony_ci 452e5b6d6dSopenharmony_ci# Set a scaling factor for all data; 1.0 means no scaling. 462e5b6d6dSopenharmony_ci# Scale must be > 0. 472e5b6d6dSopenharmony_cisub setScale { 482e5b6d6dSopenharmony_ci my ($self, $scale) = @_; 492e5b6d6dSopenharmony_ci $self->{_scale} = $scale; 502e5b6d6dSopenharmony_ci} 512e5b6d6dSopenharmony_ci 522e5b6d6dSopenharmony_ci# Multiply the scaling factor by a value. 532e5b6d6dSopenharmony_cisub scaleBy { 542e5b6d6dSopenharmony_ci my ($self, $a) = @_; 552e5b6d6dSopenharmony_ci $self->{_scale} *= $a; 562e5b6d6dSopenharmony_ci} 572e5b6d6dSopenharmony_ci 582e5b6d6dSopenharmony_ci# Return the mean. 592e5b6d6dSopenharmony_cisub getMean { 602e5b6d6dSopenharmony_ci my $self = shift; 612e5b6d6dSopenharmony_ci return $self->{_mean} * $self->{_scale}; 622e5b6d6dSopenharmony_ci} 632e5b6d6dSopenharmony_ci 642e5b6d6dSopenharmony_ci# Return a 99% error based on the t distribution. The dataset 652e5b6d6dSopenharmony_ci# is described as getMean() +/- getError(). 662e5b6d6dSopenharmony_cisub getError { 672e5b6d6dSopenharmony_ci my $self = shift; 682e5b6d6dSopenharmony_ci return $self->{_error} * $self->{_scale}; 692e5b6d6dSopenharmony_ci} 702e5b6d6dSopenharmony_ci 712e5b6d6dSopenharmony_ci# Divide two Datasets and return a new one, maintaining the 722e5b6d6dSopenharmony_ci# mean+/-error. The new Dataset has no data points. 732e5b6d6dSopenharmony_cisub divide { 742e5b6d6dSopenharmony_ci my $self = shift; 752e5b6d6dSopenharmony_ci my $rhs = shift; 762e5b6d6dSopenharmony_ci 772e5b6d6dSopenharmony_ci my $minratio = ($self->{_mean} - $self->{_error}) / 782e5b6d6dSopenharmony_ci ($rhs->{_mean} + $rhs->{_error}); 792e5b6d6dSopenharmony_ci my $maxratio = ($self->{_mean} + $self->{_error}) / 802e5b6d6dSopenharmony_ci ($rhs->{_mean} - $rhs->{_error}); 812e5b6d6dSopenharmony_ci 822e5b6d6dSopenharmony_ci my $result = Dataset->new(); 832e5b6d6dSopenharmony_ci $result->{_mean} = ($minratio + $maxratio) / 2; 842e5b6d6dSopenharmony_ci $result->{_error} = $result->{_mean} - $minratio; 852e5b6d6dSopenharmony_ci $result->{_scale} = $self->{_scale} / $rhs->{_scale}; 862e5b6d6dSopenharmony_ci $result; 872e5b6d6dSopenharmony_ci} 882e5b6d6dSopenharmony_ci 892e5b6d6dSopenharmony_ci# subtracts two Datasets and return a new one, maintaining the 902e5b6d6dSopenharmony_ci# mean+/-error. The new Dataset has no data points. 912e5b6d6dSopenharmony_cisub subtract { 922e5b6d6dSopenharmony_ci my $self = shift; 932e5b6d6dSopenharmony_ci my $rhs = shift; 942e5b6d6dSopenharmony_ci 952e5b6d6dSopenharmony_ci my $result = Dataset->new(); 962e5b6d6dSopenharmony_ci $result->{_mean} = $self->{_mean} - $rhs->{_mean}; 972e5b6d6dSopenharmony_ci $result->{_error} = $self->{_error} + $rhs->{_error}; 982e5b6d6dSopenharmony_ci $result->{_scale} = $self->{_scale}; 992e5b6d6dSopenharmony_ci $result; 1002e5b6d6dSopenharmony_ci} 1012e5b6d6dSopenharmony_ci 1022e5b6d6dSopenharmony_ci# adds two Datasets and return a new one, maintaining the 1032e5b6d6dSopenharmony_ci# mean+/-error. The new Dataset has no data points. 1042e5b6d6dSopenharmony_cisub add { 1052e5b6d6dSopenharmony_ci my $self = shift; 1062e5b6d6dSopenharmony_ci my $rhs = shift; 1072e5b6d6dSopenharmony_ci 1082e5b6d6dSopenharmony_ci my $result = Dataset->new(); 1092e5b6d6dSopenharmony_ci $result->{_mean} = $self->{_mean} + $rhs->{_mean}; 1102e5b6d6dSopenharmony_ci $result->{_error} = $self->{_error} + $rhs->{_error}; 1112e5b6d6dSopenharmony_ci $result->{_scale} = $self->{_scale}; 1122e5b6d6dSopenharmony_ci $result; 1132e5b6d6dSopenharmony_ci} 1142e5b6d6dSopenharmony_ci 1152e5b6d6dSopenharmony_ci# Divides a dataset by a scalar. 1162e5b6d6dSopenharmony_ci# The new Dataset has no data points. 1172e5b6d6dSopenharmony_cisub divideByScalar { 1182e5b6d6dSopenharmony_ci my $self = shift; 1192e5b6d6dSopenharmony_ci my $s = shift; 1202e5b6d6dSopenharmony_ci 1212e5b6d6dSopenharmony_ci my $result = Dataset->new(); 1222e5b6d6dSopenharmony_ci $result->{_mean} = $self->{_mean}/$s; 1232e5b6d6dSopenharmony_ci $result->{_error} = $self->{_error}/$s; 1242e5b6d6dSopenharmony_ci $result->{_scale} = $self->{_scale}; 1252e5b6d6dSopenharmony_ci $result; 1262e5b6d6dSopenharmony_ci} 1272e5b6d6dSopenharmony_ci 1282e5b6d6dSopenharmony_ci# Divides a dataset by a scalar. 1292e5b6d6dSopenharmony_ci# The new Dataset has no data points. 1302e5b6d6dSopenharmony_cisub multiplyByScalar { 1312e5b6d6dSopenharmony_ci my $self = shift; 1322e5b6d6dSopenharmony_ci my $s = shift; 1332e5b6d6dSopenharmony_ci 1342e5b6d6dSopenharmony_ci my $result = Dataset->new(); 1352e5b6d6dSopenharmony_ci $result->{_mean} = $self->{_mean}*$s; 1362e5b6d6dSopenharmony_ci $result->{_error} = $self->{_error}*$s; 1372e5b6d6dSopenharmony_ci $result->{_scale} = $self->{_scale}; 1382e5b6d6dSopenharmony_ci $result; 1392e5b6d6dSopenharmony_ci} 1402e5b6d6dSopenharmony_ci 1412e5b6d6dSopenharmony_ci1; 142