12e5b6d6dSopenharmony_ci#!/usr/local/bin/perl
22e5b6d6dSopenharmony_ci#  ********************************************************************
32e5b6d6dSopenharmony_ci#  * COPYRIGHT:
42e5b6d6dSopenharmony_ci#  * © 2016 and later: Unicode, Inc. and others.
52e5b6d6dSopenharmony_ci#  * License & terms of use: http://www.unicode.org/copyright.html
62e5b6d6dSopenharmony_ci#  * Copyright (c) 2006, International Business Machines Corporation and
72e5b6d6dSopenharmony_ci#  * others. All Rights Reserved.
82e5b6d6dSopenharmony_ci#  ********************************************************************
92e5b6d6dSopenharmony_ci
102e5b6d6dSopenharmony_cipackage Dataset;
112e5b6d6dSopenharmony_ciuse Statistics::Descriptive;
122e5b6d6dSopenharmony_ciuse Statistics::Distributions;
132e5b6d6dSopenharmony_ciuse strict;
142e5b6d6dSopenharmony_ci
152e5b6d6dSopenharmony_ci# Create a new Dataset with the given data.
162e5b6d6dSopenharmony_cisub new {
172e5b6d6dSopenharmony_ci    my ($class) = shift;
182e5b6d6dSopenharmony_ci    my $self = bless {
192e5b6d6dSopenharmony_ci        _data => \@_,
202e5b6d6dSopenharmony_ci        _scale => 1.0,
212e5b6d6dSopenharmony_ci        _mean => 0.0,
222e5b6d6dSopenharmony_ci        _error => 0.0,
232e5b6d6dSopenharmony_ci    }, $class;
242e5b6d6dSopenharmony_ci
252e5b6d6dSopenharmony_ci    my $n = @_;
262e5b6d6dSopenharmony_ci
272e5b6d6dSopenharmony_ci    if ($n >= 1) {
282e5b6d6dSopenharmony_ci        my $stats = Statistics::Descriptive::Full->new();
292e5b6d6dSopenharmony_ci        $stats->add_data(@{$self->{_data}});
302e5b6d6dSopenharmony_ci        $self->{_mean} = $stats->mean();
312e5b6d6dSopenharmony_ci
322e5b6d6dSopenharmony_ci        if ($n >= 2) {
332e5b6d6dSopenharmony_ci            # Use a t distribution rather than Gaussian because (a) we
342e5b6d6dSopenharmony_ci            # assume an underlying normal dist, (b) we do not know the
352e5b6d6dSopenharmony_ci            # standard deviation -- we estimate it from the data, and (c)
362e5b6d6dSopenharmony_ci            # we MAY have a small sample size (also works for large n).
372e5b6d6dSopenharmony_ci            my $t = Statistics::Distributions::tdistr($n-1, 0.005);
382e5b6d6dSopenharmony_ci            $self->{_error} = $t * $stats->standard_deviation();
392e5b6d6dSopenharmony_ci        }
402e5b6d6dSopenharmony_ci    }
412e5b6d6dSopenharmony_ci
422e5b6d6dSopenharmony_ci    $self;
432e5b6d6dSopenharmony_ci}
442e5b6d6dSopenharmony_ci
452e5b6d6dSopenharmony_ci# Set a scaling factor for all data; 1.0 means no scaling.
462e5b6d6dSopenharmony_ci# Scale must be > 0.
472e5b6d6dSopenharmony_cisub setScale {
482e5b6d6dSopenharmony_ci    my ($self, $scale) = @_;
492e5b6d6dSopenharmony_ci    $self->{_scale} = $scale;
502e5b6d6dSopenharmony_ci}
512e5b6d6dSopenharmony_ci
522e5b6d6dSopenharmony_ci# Multiply the scaling factor by a value.
532e5b6d6dSopenharmony_cisub scaleBy {
542e5b6d6dSopenharmony_ci    my ($self, $a) = @_;
552e5b6d6dSopenharmony_ci    $self->{_scale} *= $a;
562e5b6d6dSopenharmony_ci}
572e5b6d6dSopenharmony_ci
582e5b6d6dSopenharmony_ci# Return the mean.
592e5b6d6dSopenharmony_cisub getMean {
602e5b6d6dSopenharmony_ci    my $self = shift;
612e5b6d6dSopenharmony_ci    return $self->{_mean} * $self->{_scale};
622e5b6d6dSopenharmony_ci}
632e5b6d6dSopenharmony_ci
642e5b6d6dSopenharmony_ci# Return a 99% error based on the t distribution.  The dataset
652e5b6d6dSopenharmony_ci# is described as getMean() +/- getError().
662e5b6d6dSopenharmony_cisub getError {
672e5b6d6dSopenharmony_ci    my $self = shift;
682e5b6d6dSopenharmony_ci    return $self->{_error} * $self->{_scale};
692e5b6d6dSopenharmony_ci}
702e5b6d6dSopenharmony_ci
712e5b6d6dSopenharmony_ci# Divide two Datasets and return a new one, maintaining the
722e5b6d6dSopenharmony_ci# mean+/-error.  The new Dataset has no data points.
732e5b6d6dSopenharmony_cisub divide {
742e5b6d6dSopenharmony_ci    my $self = shift;
752e5b6d6dSopenharmony_ci    my $rhs = shift;
762e5b6d6dSopenharmony_ci
772e5b6d6dSopenharmony_ci    my $minratio = ($self->{_mean} - $self->{_error}) /
782e5b6d6dSopenharmony_ci                   ($rhs->{_mean} + $rhs->{_error});
792e5b6d6dSopenharmony_ci    my $maxratio = ($self->{_mean} + $self->{_error}) /
802e5b6d6dSopenharmony_ci                   ($rhs->{_mean} - $rhs->{_error});
812e5b6d6dSopenharmony_ci
822e5b6d6dSopenharmony_ci    my $result = Dataset->new();
832e5b6d6dSopenharmony_ci    $result->{_mean} = ($minratio + $maxratio) / 2;
842e5b6d6dSopenharmony_ci    $result->{_error} = $result->{_mean} - $minratio;
852e5b6d6dSopenharmony_ci    $result->{_scale} = $self->{_scale} / $rhs->{_scale};
862e5b6d6dSopenharmony_ci    $result;
872e5b6d6dSopenharmony_ci}
882e5b6d6dSopenharmony_ci
892e5b6d6dSopenharmony_ci# subtracts two Datasets and return a new one, maintaining the
902e5b6d6dSopenharmony_ci# mean+/-error.  The new Dataset has no data points.
912e5b6d6dSopenharmony_cisub subtract {
922e5b6d6dSopenharmony_ci    my $self = shift;
932e5b6d6dSopenharmony_ci    my $rhs = shift;
942e5b6d6dSopenharmony_ci
952e5b6d6dSopenharmony_ci    my $result = Dataset->new();
962e5b6d6dSopenharmony_ci    $result->{_mean} = $self->{_mean} - $rhs->{_mean};
972e5b6d6dSopenharmony_ci    $result->{_error} = $self->{_error} + $rhs->{_error};
982e5b6d6dSopenharmony_ci    $result->{_scale} = $self->{_scale};
992e5b6d6dSopenharmony_ci    $result;
1002e5b6d6dSopenharmony_ci}
1012e5b6d6dSopenharmony_ci
1022e5b6d6dSopenharmony_ci# adds two Datasets and return a new one, maintaining the
1032e5b6d6dSopenharmony_ci# mean+/-error.  The new Dataset has no data points.
1042e5b6d6dSopenharmony_cisub add {
1052e5b6d6dSopenharmony_ci    my $self = shift;
1062e5b6d6dSopenharmony_ci    my $rhs = shift;
1072e5b6d6dSopenharmony_ci
1082e5b6d6dSopenharmony_ci    my $result = Dataset->new();
1092e5b6d6dSopenharmony_ci    $result->{_mean} = $self->{_mean} + $rhs->{_mean};
1102e5b6d6dSopenharmony_ci    $result->{_error} = $self->{_error} + $rhs->{_error};
1112e5b6d6dSopenharmony_ci    $result->{_scale} = $self->{_scale};
1122e5b6d6dSopenharmony_ci    $result;
1132e5b6d6dSopenharmony_ci}
1142e5b6d6dSopenharmony_ci
1152e5b6d6dSopenharmony_ci# Divides a dataset by a scalar.
1162e5b6d6dSopenharmony_ci# The new Dataset has no data points.
1172e5b6d6dSopenharmony_cisub divideByScalar {
1182e5b6d6dSopenharmony_ci    my $self = shift;
1192e5b6d6dSopenharmony_ci    my $s = shift;
1202e5b6d6dSopenharmony_ci
1212e5b6d6dSopenharmony_ci    my $result = Dataset->new();
1222e5b6d6dSopenharmony_ci    $result->{_mean} = $self->{_mean}/$s;
1232e5b6d6dSopenharmony_ci    $result->{_error} = $self->{_error}/$s;
1242e5b6d6dSopenharmony_ci    $result->{_scale} = $self->{_scale};
1252e5b6d6dSopenharmony_ci    $result;
1262e5b6d6dSopenharmony_ci}
1272e5b6d6dSopenharmony_ci
1282e5b6d6dSopenharmony_ci# Divides a dataset by a scalar.
1292e5b6d6dSopenharmony_ci# The new Dataset has no data points.
1302e5b6d6dSopenharmony_cisub multiplyByScalar {
1312e5b6d6dSopenharmony_ci    my $self = shift;
1322e5b6d6dSopenharmony_ci    my $s = shift;
1332e5b6d6dSopenharmony_ci
1342e5b6d6dSopenharmony_ci    my $result = Dataset->new();
1352e5b6d6dSopenharmony_ci    $result->{_mean} = $self->{_mean}*$s;
1362e5b6d6dSopenharmony_ci    $result->{_error} = $self->{_error}*$s;
1372e5b6d6dSopenharmony_ci    $result->{_scale} = $self->{_scale};
1382e5b6d6dSopenharmony_ci    $result;
1392e5b6d6dSopenharmony_ci}
1402e5b6d6dSopenharmony_ci
1412e5b6d6dSopenharmony_ci1;
142