Skip to content

Commit a459fb7

Browse files
authored
Merge pull request #27 from lawson89/main
initial checkin
2 parents 11c6ff6 + 92ab4c8 commit a459fb7

File tree

4 files changed

+529
-0
lines changed

4 files changed

+529
-0
lines changed

entries/rlawson/README.md

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Richard Lawson
2+
3+
An Entry to the One Billion Row Challenge in Object Pascal.
4+
5+
Approach.
6+
7+
- Sequentially read the measurement file, splitting into chunks of 64k.
8+
- Populate a `TFPHashList` with station names, min, max, count and sum; without storing all the temperature measurements.
9+
- Use a custom comparer to sort the station and temperature statistics in a `TStringList`.
10+
- Display the sorted measurements using a simple for loop.
11+
12+
## Getting Started
13+
14+
### Dependencies
15+
16+
* None. Only latest Free Pascal Compiler and Lazarus. You can get these easily by using [`https://github.com/LongDirtyAnimAlf/fpcupdeluxe/releases`](https://github.com/LongDirtyAnimAlf/fpcupdeluxe/releases).
17+
18+
### Compiling
19+
20+
* Open `weather.lpi` using Lazarus.
21+
* Hit `Ctrl + F9` to compile.
22+
23+
### Running the executable
24+
25+
```bash
26+
$ ./weather -i your_measurements.txt
27+
```
28+
29+
To time the execution, do the following.
30+
31+
```bash
32+
$ time ./weather -i your_measurements.txt
33+
```
34+
35+
## Authors
36+
37+
Richard Lawson
38+
[lawson89](https://github.com/lawson89)
39+
40+
## Version History
41+
42+
* 1.0
43+
* Initial Release - Sequential approach.
44+
45+
## License
46+
47+
This project is licensed under the MIT License - see the LICENSE.md file for details
48+
49+
## Acknowledgments
50+
51+
Inspiration, code snippets, etc.
52+
53+
1. The FPC team, Lazarus team, fpcupdeluxe team, and other contributors.
54+
- For providing a usable programming language and a usable ecosystem.
55+
2. Gustavo 'Gus' Carreno.
56+
- For making this happen.
57+
- Borrowed Gus' approach to use `TCustomApplication` and using `unit`s properly
58+
to make main code more readable.
59+
- Borrowed and modified Gus' `WriteHelp` from the `baseline.lpr`.
60+
3. Székely Balázs.
61+
- I borrowed the custom `TStringList` comparer from the `baseline` program.
62+
4. Iwan Kelaiah.
63+
- I borrowed the README.md and output generation code.

entries/rlawson/src/parser.pas

Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
unit parser;
2+
3+
{$mode ObjFPC}{$H+}
4+
5+
interface
6+
7+
uses
8+
Classes, SysUtils, bufstream, Contnrs, Math;
9+
10+
procedure ReadMeasurements(inputFile: string);
11+
12+
type
13+
PTCityTemp = ^TCityTemp;
14+
15+
TCityTemp = record
16+
city: string;
17+
max: int64;
18+
min: int64;
19+
total: int64;
20+
numReadings: integer;
21+
end;
22+
23+
24+
implementation
25+
26+
const
27+
READ_SIZE = 65536;
28+
// read size plus enough to hold one record if we split it
29+
BUFFER_SIZE = READ_SIZE + 1024;
30+
REC_SEP: char = ';';
31+
LF: char = chr(10);
32+
ANSI_ZERO: integer = 48;
33+
DECIMAL_POINT: char = '.';
34+
NEGATIVE_SIGN: char = '-';
35+
36+
procedure ProcessMeasurements(var buffer: array of char; bufferLength: integer;
37+
var results: TFPHashList);
38+
var
39+
currentChar: char;
40+
idx: integer = 0;
41+
currentTempSign: integer = 1;
42+
temp, cityStart: integer;
43+
city: shortstring;
44+
reading: PTCityTemp;
45+
begin
46+
//Writeln('bufferLength: ', bufferLength);
47+
while idx < (bufferLength - 1) do
48+
begin
49+
{slurp up the city}
50+
city := '';
51+
cityStart := idx;
52+
currentChar := buffer[idx];
53+
while currentChar <> REC_SEP do
54+
begin
55+
Inc(idx);
56+
currentChar := buffer[idx];
57+
end;
58+
SetString(city, @buffer[cityStart], (idx - cityStart));
59+
{slurp up the temp reading}
60+
Inc(idx); // move pointer past the ;
61+
currentTempSign := 1;
62+
// check for negative sign, if so flag the multiplier and then move past neg sign
63+
if buffer[idx] = NEGATIVE_SIGN then
64+
begin
65+
currentTempSign := -1;
66+
Inc(idx);
67+
end;
68+
// look ahead - is decimal point 2 spaces away then we have two digits
69+
temp := 0;
70+
if buffer[idx + 2] = DECIMAL_POINT then
71+
begin
72+
temp := 100 * (byte(buffer[idx]) - ANSI_ZERO);
73+
Inc(idx);
74+
end;
75+
temp := temp + 10 * (byte(buffer[idx]) - ANSI_ZERO);
76+
idx := idx + 2;
77+
temp := currentTempSign * (temp + (byte(buffer[idx]) - ANSI_ZERO));
78+
if temp > 999 then
79+
begin
80+
WriteLn('Somethign wrong!', city, ' | ', temp, ' | ', buffer[idx - 3],
81+
' | ', buffer[idx - 2],
82+
' | ', buffer[idx - 1], ' | ', buffer[idx]);
83+
break;
84+
end;
85+
86+
currentChar := buffer[idx];
87+
while currentChar <> LF do
88+
begin
89+
Inc(idx);
90+
currentChar := buffer[idx];
91+
end;
92+
Inc(idx);
93+
reading := results.Find(city);
94+
if reading = nil then
95+
begin
96+
reading := New(PTCityTemp);
97+
reading^.city := city;
98+
reading^.max := temp;
99+
reading^.min := temp;
100+
reading^.numReadings := 1;
101+
reading^.total := temp;
102+
results.Add(city, reading);
103+
end
104+
else
105+
begin
106+
reading^.total := reading^.total + temp;
107+
reading^.max := Max(reading^.max, temp);
108+
reading^.min := Min(reading^.min, temp);
109+
reading^.numReadings := reading^.numReadings + 1;
110+
end;
111+
end;
112+
//WriteLn('results: ', results.Count);
113+
end;
114+
115+
function PascalRound(x: double): double;
116+
var
117+
t: double;
118+
begin
119+
//round towards positive infinity
120+
t := Trunc(x);
121+
if (x < 0.0) and (t - x = 0.5) then
122+
begin
123+
// Do nothing
124+
end
125+
else if Abs(x - t) >= 0.5 then
126+
begin
127+
t := t + Math.Sign(x);
128+
end;
129+
130+
if t = 0.0 then
131+
Result := 0.0
132+
else
133+
Result := t;
134+
end;
135+
136+
137+
function RoundEx(x: double): double;
138+
begin
139+
Result := PascalRound(x * 10.0) / 10.0;
140+
end;
141+
142+
function Compare(AList: TStringList; AIndex1, AIndex2: integer): integer;
143+
var
144+
Pos1, Pos2: integer;
145+
Str1, Str2: string;
146+
begin
147+
Result := 0;
148+
Str1 := AList.Strings[AIndex1];
149+
Str2 := AList.Strings[AIndex2];
150+
Pos1 := Pos('=', Str1);
151+
Pos2 := Pos('=', Str2);
152+
if (Pos1 > 0) and (Pos2 > 0) then
153+
begin
154+
Str1 := Copy(Str1, 1, Pos1 - 1);
155+
Str2 := Copy(Str2, 1, Pos2 - 1);
156+
Result := CompareStr(Str1, Str2);
157+
end;
158+
end;
159+
160+
procedure DumpMeasurements(results: TFPHashList);
161+
var
162+
i: integer;
163+
reading: PTCityTemp;
164+
readingStr, ws: string;
165+
min: double;
166+
max: double;
167+
mean: double;
168+
weatherStationList: TStringList;
169+
isFirstKey: boolean = True;
170+
begin
171+
//WriteLn(results.Count);
172+
weatherStationList := TStringList.Create;
173+
for i := 0 to results.Count - 1 do
174+
begin
175+
reading := results.Items[i];
176+
min := RoundEx(reading^.min / 10);
177+
max := RoundEx(reading^.max / 10);
178+
mean := RoundEx(reading^.total / reading^.numReadings / 10);
179+
readingStr := reading^.city + '=' + FormatFloat('0.0', min) +
180+
'/' + FormatFloat('0.0', mean) + '/' + FormatFloat('0.0', max);
181+
{$IFDEF DEBUG}
182+
readingStr := reading^.city + '=' + FormatFloat('0.0', min) +
183+
'/' + FormatFloat('0.0', mean) + '/' + FormatFloat('0.0', max) +
184+
'/' + IntToStr(reading^.total) + '/' + IntToStr(reading^.numReadings);
185+
{$ENDIF}
186+
weatherStationList.Add(readingStr);
187+
Dispose(reading);
188+
end;
189+
weatherStationList.CustomSort(@Compare);
190+
Write('{');
191+
for ws in weatherStationList do
192+
begin
193+
// If it's not the first key, print a comma
194+
if not isFirstKey then
195+
Write(', ');
196+
// Print the weather station and the temp stat
197+
Write(ws);
198+
// Set isFirstKey to False after printing the first key
199+
isFirstKey := False;
200+
end;
201+
WriteLn('}');
202+
end;
203+
204+
205+
procedure DumpExceptionCallStack(E: Exception);
206+
var
207+
I: integer;
208+
Frames: PPointer;
209+
Report: string;
210+
begin
211+
Report := 'Program exception! ' + LineEnding + 'Stacktrace:' +
212+
LineEnding + LineEnding;
213+
if E <> nil then
214+
begin
215+
Report := Report + 'Exception class: ' + E.ClassName + LineEnding +
216+
'Message: ' + E.Message + LineEnding;
217+
end;
218+
Report := Report + BackTraceStrFunc(ExceptAddr);
219+
Frames := ExceptFrames;
220+
for I := 0 to ExceptFrameCount - 1 do
221+
Report := Report + LineEnding + BackTraceStrFunc(Frames[I]);
222+
WriteLn(Report);
223+
Halt; // End of program execution
224+
end;
225+
226+
227+
procedure ReadMeasurements(inputFile: string);
228+
var
229+
totalBytesRead, BytesRead: int64;
230+
Buffer: array [0..BUFFER_SIZE] of char;
231+
FileStream: TFileStream;
232+
fileSize: int64;
233+
ReadBufferStream: TReadBufStream;
234+
starttime: uint64;
235+
elapsedTimeSec, MBRead: double;
236+
results: TFPHashList;
237+
currentChar: char;
238+
idx: integer;
239+
startOfNextRecord: string;
240+
startOfNextRecordLength: integer;
241+
bufferLength: integer = 0;
242+
begin
243+
try
244+
FileStream := TFileStream.Create(inputFile, fmOpenRead);
245+
FileStream.Position := 0; // Ensure you are at the start of the file
246+
ReadBufferStream := TReadBufStream.Create(FileStream);
247+
fileSize := FileStream.size;
248+
totalBytesRead := 0;
249+
starttime := GetTickCount64;
250+
results := TFPHashList.Create;
251+
startOfNextRecord := '';
252+
while totalBytesRead <= fileSize do
253+
// While the amount of data read is less than or equal to the size of the stream do
254+
begin
255+
startOfNextRecordLength := Length(startOfNextRecord);
256+
//WriteLn('startOfNextRecordLength: ', startOfNextRecordLength);
257+
// if we have leftover from previous read then prepend it to this buffer
258+
if startOfNextRecordLength > 0 then
259+
Move(PChar(startOfNextRecord)^, Buffer[0], startOfNextRecordLength);
260+
BytesRead := ReadBufferStream.Read(Buffer[startOfNextRecordLength], READ_SIZE);
261+
//WriteLn('Bytes read: ', BytesRead);
262+
if BytesRead < 1 then break;
263+
// now look in buffer backwards until we find the first LF
264+
bufferLength := startOfNextRecordLength + BytesRead;
265+
idx := bufferLength - 1;
266+
currentChar := buffer[idx];
267+
while (currentChar <> LF) do
268+
begin
269+
Dec(idx);
270+
currentChar := buffer[idx];
271+
end;
272+
ProcessMeasurements(Buffer, idx + 1, results);
273+
startOfNextRecord := '';
274+
startOfNextRecordLength := bufferLength - idx - 1;
275+
//WriteLn('startOfNextRecordLength: ', startOfNextRecordLength);
276+
if startOfNextRecordLength > 0 then
277+
SetString(startOfNextRecord, @buffer[idx + 1], startOfNextRecordLength);
278+
Inc(totalBytesRead, BytesRead);
279+
end;
280+
DumpMeasurements(results);
281+
elapsedTimeSec := (GetTickCount64() - starttime) / 1000;
282+
MBRead := (totalBytesRead / (1024 * 1024));
283+
{$IFDEF DEBUG}
284+
WriteLn(inputFile);
285+
WriteLn('Buffer size: ', SizeOf(Buffer));
286+
WriteLn('Read size: ', READ_SIZE);
287+
WriteLn('File size: ', FileStream.Size);
288+
WriteLn('Total Bytes Read: ', totalBytesRead);
289+
WriteLn(Format('%f MB read', [MBRead]));
290+
WriteLn(Format('%f secs', [elapsedTimeSec]));
291+
WriteLn(Format('%f MB/s processed', [MBRead / elapsedTimeSec]));
292+
{$ENDIF}
293+
ReadBufferStream.Free;
294+
FileStream.Free;
295+
results.Free;
296+
except
297+
on E: Exception do
298+
begin
299+
writeln('File ', inputFile, ' could not be read or written because: ', E.ToString);
300+
DumpExceptionCallStack(E);
301+
end;
302+
end;
303+
end;
304+
305+
end.

0 commit comments

Comments
 (0)