Friday, December 23, 2022

A Fun Geeky Project during a Bomb Cyclone

We are apparently in the midst of a Bomb Cyclone. I thought it might be fun to compare the temperatures around the state during one. Well there is a Linux app called "weather-util" and when provided with a Weather Station it reports on the current conditions like so:


terry@CloudedLeopord:~/Documents/fpc/WeatherStations$ weather-util kdsm
Current conditions at Des Moines International, IA
Last updated Dec 23, 2022 - 01:57 PM EST / 2022.12.23 1857 UTC
   Temperature: -2.9 F (-19.4 C)
   Relative Humidity: 71%
   Wind: from the WNW (300 degrees) at 29 MPH (25 KT) gusting to 40 MPH (35 KT)
   Windchill: -29 F (-34 C)
   Weather: blowing snow
   Sky conditions: overcast
First I needed to extract all the weather stations for Iowa (That's my state). So I wrote the following little program.
Program CaptureWS;
Uses SysUtils, db, sqldb, mysql80conn, StrUtils;
{$I /home/terry/Documents/fpc/MysqlConnLib.inc}
{$I /home/terry/Documents/fpc/DbConst.inc}
Type
StationType = Record
StationCode: String[10];
StationDesc: String[80];
StationLoc: String[40];
StationMetar: String[80];
StationZone: String[40];
End;
Const
KkMax=5000;

Var
WSConn: TSqlConnector;
WSTran: TsqlTransaction;
WSQry: TSqlQuery;
IFile: Text;
i, Tst, Cnt: LongInt;
StationArray: Array[1..KkMax] of StationType;
StationBuffer: StationType;
Buffer, Work: String;
Sql: AnsiString;


Begin
Assign(IFile, 'stations.txt');
Reset(IFile);
ReadLn(IFile, Buffer);
i := 1;
Repeat
If Copy(Buffer,1,1) = '[' Then
Begin
StationBuffer.StationCode := Copy(Buffer,1,6);
ReadLn(IFile, Buffer);
Work := Copy(Buffer,1,80);
StationBuffer.StationDesc := Copy(Buffer,1,80);
ReadLn(IFile, Buffer);
StationBuffer.StationLoc := Copy(Buffer,1,40);
ReadLn(IFile, Buffer);
StationBuffer.StationMetar := Copy(Buffer,1,80);
ReadLn(IFile, Buffer);
StationBuffer.StationZone := Copy(Buffer,1,80);
Tst := Pos(', IA,', Work );
If Tst > 0 Then
Begin
StationArray[i] := StationBuffer;
i := i + 1;
End;
End;
ReadLn(IFile, Buffer);
Until Eof(IFile);
Cnt := i - 1;
WriteLn;

WSConn := CreateConnection('MySQL 8.0', '127.0.0.1', 'WeatherStation', MyUser, MyPass);
CreateTransaction(WSConn, WSTran);
WSQry := GetQuery(WSConn, WSTran);
WSConn.Open;
For i := 1 To Cnt Do
Begin
Sql := 'Insert Into WeatherStation(WSCode, WSDesc, WSLoc, WSMetar, WSZone) Values (' +
QuotedStr(StationArray[i].StationCode) + ', ' +
QuotedStr(StationArray[i].StationDesc) + ', ' +
QuotedStr(StationArray[i].StationLoc) + ', ' +
QuotedStr(StationArray[i].StationMetar) + ', ' +
QuotedStr(StationArray[i].StationZone) + ')';
WSQry.Sql.Clear;
WSQry.Sql.Add(Sql);
WSQry.ExecSql;
WSTran.Commit;
End;
WSConn.Close;
WSQry.Free;
WSConn.Free;
WSTran.Free;
End.

I am using a free compiler here called Free Pascal, it is relatively fast. It extracts the data from a text file (called stations.txt) that I found on the Interwebs. The data looks like the following:

[k1p1]
description = Plymouth, Plymouth Municipal Airport, NH, United States
location = (0.7640906, -1.2523368)
metar = http://tgftp.nws.noaa.gov/data/observations/metar/decoded/K1P1.TXT
zone = ('nhz005', 0.0023109)

[k1r7]
description = Brookhaven, Ms, US
location = (0.5515240, -1.5781267)
metar = http://tgftp.nws.noaa.gov/data/observations/metar/decoded/K1R7.TXT
zone = ('msz062', 0.0012836)

[k1s5]
description = Sunnyside Muni, Sunnyside, WA, United States of America
location = (0.8085601, -2.0938778)
metar = http://tgftp.nws.noaa.gov/data/observations/metar/decoded/K1S5.TXT
zone = ('waz027', 0.0045105)

So when I find a square left bracket in column one, I know I am on the first line of a new data item and I save that line and the next three lines to a data structure.  If the 2nd line of the data item contains a ", IA,", I know it is an Iowa Weather Station and the new data structure gets saved to an array.  Finally, I save all the data records in the array to a MySQL table for easy access.

Once I got the Weather Stations into a MySql table, I wrote another program to calculate the average and Standard Deviation for all of the Iowa Weather Stations.

Program ListIAWeather;
Uses Classes, SysUtils, db, sqldb, mysql80conn, StrUtils, Process, Math;
{$I /home/terry/Documents/fpc/MysqlConnLib.inc}
{$I /home/terry/Documents/fpc/DbConst.inc}

Type
	DblArrayType = Array[1..5000] of Double;

		
Var
	WSConn:					TSqlConnector;
	WSTran: 					TsqlTransaction;
	WSQry:					TSqlQuery;
	CityCode:					String;
	WeatherP: 					TProcess;
  	WeatherSL: 					TStringList;
  	i, j, k:					LongInt;
  	Temp, WChill:				DblArrayType;
  	AvgTemp, AvgWChill, StdTemp, 
  	StdWChill:					Double;
  	Work:						String;
  	
Function Avg(a: DblArrayType; n: LongInt): Double;
Var
	d, Cnt:	Double;
	i:		LongInt;
Begin
	d := 0; Cnt := n;
	For i := 1 To n Do
		d := d + a[i];
	Avg := d / Cnt;
End;

Function PopStd(a: DblArrayType; n: LongInt; AV: Double): Double;
Var
	d, Cnt:		Double;
	i:			LongInt;
Begin
	d := 0; Cnt := n;
	For i := 1 To n Do 
		d := d + Sqr(AV - a[i]);
	PopStd := Sqrt(d / Cnt);	
End;
	
Begin
	WSConn := CreateConnection('MySQL 8.0', '127.0.0.1', 'WeatherStation', MyUser, MyPass);
	CreateTransaction(WSConn, WSTran);
	WSQry := GetQuery(WSConn, WSTran);
	
	WSQry.SQL.Text := 'Select WSCode from WeatherStation;';
	WSConn.Open;
	WSQry.Open;
	WeatherP := TProcess.Create(nil);
	WeatherSL := TStringList.Create;
	WeatherP.Executable := 'weather-util';
	WeatherP.Options := WeatherP.Options + [poWaitOnExit, poUsePipes]; 
	j := 1; k := 1;
	If WSConn.Connected Then
		Repeat
			CityCode := Copy(WSQry.FieldByName('WSCode').AsString,2,4);
			WeatherP.Parameters.Clear; 
			WeatherP.Parameters.Add(CityCode); 
			WeatherP.Execute;
			WeatherSL.LoadFromStream(WeatherP.Output);
			For i := 0 To WeatherSL.Count-1 Do 
			Begin
				WriteLn(WeatherSL[i]);
				Work := WeatherSL[i];
				If Pos('Temperature', Work) > 0 Then
				Begin
					Temp[j] := StrToFloat(ExtractWord(2, Work, [' ']));
					j := j + 1;
				End;
				If Pos('Windchi', Work) > 0 Then
				Begin
					WChill[k] := StrToFloat(ExtractWord(2, Work, [' ']));
					k := k + 1;
				End;
			End;
			WriteLn;
			WSQry.Next
		Until WSQry.Eof;
	WSQry.Close;
	WSConn.Close;
	WSQry.Free;
	WSConn.Free;
	WSTran.Free;
	j := j - 1;
	k := k - 1;
	AvgTemp := Avg(Temp, j);
	StdTemp := PopStd(Temp, j, AvgTemp);
	
	AvgWChill := Avg(WChill, k);
	StdWChill := PopStd(WChill, k, AvgWChill);
	
	WriteLn('Avg. Temp: ', AvgTemp:6:2, '     Std: ', StdTemp:6:2);
	WriteLn('Avg. WChill: ', AvgWChill:6:2, '     Std: ', StdWChill:6:2);
End.

Ok, this program gets the weather station name from the mysql table and executes "weather-util" in the background.  The output from that program is captured and put in a TStringList(a sort of an array of strings) and from that each line is analyzed.  Each temperature and windchill are entered into an array.  Finally, each array the average and standard deviation is calculated.  

Standard Deviation is a number representing the spread of data items and is heavily used in business and science.  One deviation from mean should cover 68% of data items, 2 deviations should cover 95% and three deviations should cover 99.97% of all data items.


On 12/22 I got this result:

Avg. Temp:  -7.98     Std:   5.53
Avg. WChill: -33.43     Std:   7.42

So across the state 68 percent of the sites should have a temperature -13.51 to -2.45.

On 12/23, I reran it and got the following:

Avg. Temp:  -2.11     Std:   2.90
Avg. WChill: -27.81     Std:   4.47
 

A muuch smaller variation of the data. So the temperature was much more similar across the whole state than yesterday.

Saturday, December 03, 2022

Fun with The Normal Distribution Curve

 I came across on the web that for 2022, the Mean (the statistics word meaning average) and the standard deviation for household income in United States was $73,000 and $30,000 respectively.  Standard Deviation  is a formula to show the spread of numbers clustered around the average.  It is heavily relied on in statistics, business and science.  Those two numbers above are approximate numbers, the web site I looked at only published the approximate numbers. This standard deviation grouping forms a curve known as Normal Distribution Curve or more commonly known as the Bell Curve.  Warning, not all groups of random numbers form in this bell curve.  The easiest test to determine if it has formed a bell curve is to check if approximatly 68% of your data falls within plus or minus one standard deviation of mean, 95% falls within plus or minus 2 deviations of mean and 99.7% falls within plus or minus 3 deviations of mean.

What is this Standard Deviation thing good for anyway.  Well if you have a begin and end point, you can determine what percent of your population falls between those numbers.   To do this, one has to convert the two numbers into z variables using the formula

z=(x-Mean)/STD

Mean of course is the average and STD is the Standard Deviation. And then historically one would look these values up in a table and then to get it in a percent, we would have to multiply it by one hundred. 

Now some time ago I wrote some routines to calculate these percents in Pascal and I put them in my own personal library.  I used the hand method (I guess that is the only way to do it with the Bell Curve) which basically means that I broke the area up under the curve into a bunch of rectangles and calculated those areas, but to improve the results I made little triangles at the top of each rectangle to grab most of the curved area above each rectangle.  I have run tests and am getting fairly accurate numbers out of my function.  It's actually not that much source code:

Function Zee(std, Mean, Val: Double): Double;
Begin
	Zee := (Val - Mean) / std;
End;

Function CalculateHeight(sv, lv, x: Double): Double;
Begin
   CalculateHeight := sv * Exp(lv * 0.5 * x * x);
end;

Function BellCurveArea(BeginVal, EndVal: Double): Double;
{
         This function calculates the area under
         the Bell Curve using the hand method.
         The line for the curve is equal to:

         (1/(2 * Pi)^.5) * e^(1/x^2)
}
Var
   x, y1, y2, YVal, Sum, SqrtVal, LogVal: Double;
Begin
   SqrtVal := 1.0 / Sqrt(2.0 * Pi());
   LogVal  := Ln(1.0 / Exp(1));
   Sum := 0; x := BeginVal;
   Repeat
      y1 := CalculateHeight(SqrtVal, LogVal, x);
      y2 := CalculateHeight(SqrtVal, LogVal, x + 0.01);
      If (y1>=y2) Then
         YVal := y2
      Else YVal := y1;
      Sum := Sum + YVal * 0.01;
      // This adds the triangle .5(h * w)
      // Width = .01 and multiply that by .5 = .005
      Sum := Sum + Abs(y2-y1) * 0.005;
      x := x + 0.01;
   Until (x > EndVal);
   BellCurveArea := Sum;
End; 

Function RPad(s: String; n: LongInt): String;
Var
	i:					LongInt;
Begin
	If Length(s) < n Then
		For i := Length(s) To n Do
			s := s + ' ';
	RPad := s;
End;

Now for the fun.  To take the Mean and  the Standard Deviation and get some numbers (and Graphs) out of it.  First I wrote a little Lazarus program to produce a graph:

The source code to produce this isn't very difficult, all I did was create some arrays:

HighArray: Array[1..26] of Double = (10000, 20000, 30000, 40000, 50000, 60000,
70000, 80000, 90000, 100000, 110000, 120000, 130000, 140000,
150000, 160000, 170000, 180000, 190000, 200000, 210000, 220000,
230000, 240000, 250000, 1e6);
LowArray: Array[1..26] of Double = (0, 10000.01, 20000.01, 30000.01, 40000.01,
50000.01, 60000.01, 70000.01, 80000.01, 90000.01, 100000.01,
110000.01, 120000.01, 130000.01, 140000.01, 150000.01, 160000.01,
170000.01, 180000.01, 190000.01, 200000.01, 210000.01, 220000.01,
230000.01, 240000.01, 250000.01);
NameArray: Array[1..26] of String = ('0-10k', '10k-20k', '20k-30k', '30k-40k',
'40k-50k', '50k-60k', '60k-70k', '70k-80k', '80k-90k', '90k-100k',
'100-110k', '110k-120k', '120k-130k', '130k-140k', '140k-150k',
'150k-160k', '160k-170k', '170k-180k', '180k-190k', '190k-200k',
'200-210k', '210k-220k', '220k-230k', '230k-240k', '240k-250k',
'250k-One Mill.');

And then the following procedure was added:

procedure TForm1.CalcButClick(Sender: TObject);
Var
i: LongInt;
Mean, Std, Pct: Double;
begin
For i := Chart1BarSeries1.Count Downto 1 Do
Chart1BarSeries1.Delete(i-1);
Mean := StrToFloat(MeanEdit.Text);
Std := StrToFloat(StdEdit.Text);
For i := 1 To 26 Do
Begin
Pct := BellCurveArea(Zee(Std, Mean, LowArray[i]),
Zee(Std, Mean, HighArray[i]));
Pct := Pct * 100;
Chart1BarSeries1.Add(Pct,NameArray[i],clred);
end;

end;


Now some people including me like to see the actual numbers:

Mean: $73,000.00
STD:  $30,000.00
Household Earnings             Percent
0-10k                          1.06798031%
10k-20k                        2.13399700%
20k-30k                        3.81964654%
30k-40k                        6.12427552%
40k-50k                        8.79611544%
50k-60k                        11.31706203%
60k-70k                        13.04320719%
70k-80k                        13.46615998%
80k-90k                        12.45408995%
90k-100k                       10.31783691%
100-110k                       7.65724858%
110k-120k                      5.09052615%
120k-130k                      3.03149098%
130k-140k                      1.61715203%
140k-150k                      0.77275731%
150k-160k                      0.33077244%
160k-170k                      0.12682508%
170k-180k                      0.04355779%
180k-190k                      0.01340004%
190k-200k                      0.00369250%
200-210k                       0.00091139%
210k-220k                      0.00020149%
220k-230k                      0.00003990%
230k-240k                      0.00000708%
240k-250k                      0.00000112%
250k-One Mill.                 0.00000018%

I generated that with the following simple little pascal program, the curly brackets with the $I just pull in my personal library.


Program HouseHoldInc2022;
Uses SysUtils, DateUtils;
Const
HighArray: Array[1..26] of Double = (10000, 20000, 30000, 40000, 50000, 60000,
70000, 80000, 90000, 100000, 110000, 120000, 130000, 140000,
150000, 160000, 170000, 180000, 190000, 200000, 210000, 220000,
230000, 240000, 250000, 1e6);
LowArray: Array[1..26] of Double = (0, 10000.01, 20000.01, 30000.01, 40000.01,
50000.01, 60000.01, 70000.01, 80000.01, 90000.01, 100000.01,
110000.01, 120000.01, 130000.01, 140000.01, 150000.01, 160000.01,
170000.01, 180000.01, 190000.01, 200000.01, 210000.01, 220000.01,
230000.01, 240000.01, 250000.01);
NameArray: Array[1..26] of String = ('0-10k', '10k-20k', '20k-30k', '30k-40k',
'40k-50k', '50k-60k', '60k-70k', '70k-80k', '80k-90k', '90k-100k',
'100-110k', '110k-120k', '120k-130k', '130k-140k', '140k-150k',
'150k-160k', '160k-170k', '170k-180k', '180k-190k', '190k-200k',
'200-210k', '210k-220k', '220k-230k', '230k-240k', '240k-250k',
'250k-One Mill.');
{$I /home/terry/Documents/fpc/MyLibrary.inc}

Var
i: LongInt;
Pct, Mean, Std: Double;


Begin
WriteLn;
Mean := 7.3e4;
Std := 3.0e4;
WriteLn('Mean: ', '$73,000.00');
WriteLn('STD: ', '$30,000.00');
WriteLn(RPad('Household Earnings', 30), 'Percent');
For i := 1 To 26 Do
Begin
Pct := BellCurveArea(Zee(Std, Mean, LowArray[i]),
Zee(Std, Mean, HighArray[i]));
Pct := Pct * 100;
WriteLn(RPad(NameArray[i], 30), Pct:6:8, '%');
End;
End.