NameWatcher.mesa
John Larson, July 13, 1987 5:16:49 pm PDT
DIRECTORY
Basics USING [BytePair, HighByte, LongNumber, LowByte],
BasicTime USING [GetClockPulses, GMT, Now, Pulses, PulsesToMicroseconds],
DFUtilities USING [DateToRope],
GVSend USING [AddRecipient, AddToItem, CheckValidity, Create, Handle, Send, StartSend, StartSendInfo, StartText],
IO USING [Flush, PutF, PutFR, PutRope, STREAM, time, Value],
IPNameUdp USING [Class, DomainHeader, Type],
Process USING [Detach, SecondsToTicks],
Rope USING [Cat, Concat, Equal, Fetch, Find, FromChar, Length, ROPE, Substr],
IPDefs USING [Byte, DataBuffer, Datagram, DatagramRec, DByte, Address, InternetHeader],
IPName USING [AddressToName, AddressToRope, LoadCacheFromName, NameToAddress],
IPRouter USING [BestAddress],
UDP USING [BodyRec, Create, default, Destroy, domain, Handle, minLength, Receive, Send],
UserCredentials USING [Get];
NameWatcher: CEDAR MONITOR
IMPORTS
Basics, BasicTime, DFUtilities, GVSend, IO, Process, Rope, IPName, IPRouter, UDP, UserCredentials =
BEGIN
ROPE: TYPE = Rope.ROPE;
STREAM: TYPE = IO.STREAM;
started: BasicTime.GMT ← BasicTime.Now[];
senderPwd: ROPE = UserCredentials.Get[].password;
sender: ROPE = UserCredentials.Get[].name;
recipient: ROPE ← "ArpanetSupport^.pa";
log: STREAMNIL;
← NameWatcher.log ← ViewerIO.CreateViewerStreams["NameWatcher"].out;
← NameWatcher.PrintServerStats[ViewerIO.CreateViewerStreams["Stats"].out, "*"]
interval: CARDINAL ← 1*10*60; -- interval between queries (10 mins)
abort: BOOLEANFALSE;
debug: BOOLEANFALSE;
stats: BOOLEANFALSE;
TestRecord: TYPE = RECORD[
server: ROPE,
query: ROPE,
expectAddr: IPDefs.Address,
failed: BOOLEAN,
sendMessage: BOOLEAN,
timeOut: INT,
histogram: Histogram]; -- in milliSecs
tests: LIST OF TestRecord ← LIST[
[
server: "parcvax.Xerox.COM",
query: "Xerox.COM",
expectAddr: [10,2,0,32],
failed: FALSE,
sendMessage: TRUE,
timeOut: 60000,
histogram: NEW[HistogramRecord]],
[
server: "sonora.dec.com",
query: "Xerox.COM",
expectAddr: [10,2,0,32],
failed: FALSE,
sendMessage: FALSE,
timeOut: 60000,
histogram: NEW[HistogramRecord]]
];
sequenceNumber: CARDINAL ← 0;
histogramSlots: INT = 300;
histogramSlotSize: INT = 100; -- ms/slot
Histogram: TYPE = REF HistogramRecord;
HistogramRecord: TYPE = RECORD [
probes: INT ← 0,
lost: INT ← 0,
counters: ARRAY [0..histogramSlots) OF INTALL[0] ];
Query: PROC [server: ROPE, query: ROPE, expectAddr: IPDefs.Address, timeOut: INT, histogram: Histogram] RETURNS[failed: BOOLEANFALSE, how: ROPENIL] = TRUSTED {
packetStart: BasicTime.Pulses;
type: IPNameUdp.Type ← a;
id: CARDINAL ← (sequenceNumber ← sequenceNumber + 1);
handle: UDP.Handle;
badReply: BOOLEANFALSE;
gotAddress: BOOLEANFALSE;
gotResponse: BOOLEANFALSE;
where: IPDefs.Address;
Report[log, "\nQuerying ", server];
[gotAddress, where] ← FindPath[server];
IF ~gotAddress THEN {Report[log, "Can't load address for server."]; RETURN[TRUE, "Can't load address for server."]};
handle ← UDP.Create[him: where, local: UDP.default, remote: UDP.domain];
IF handle = NIL THEN {
Report[log, "Can't create UDP handle.\n"]; RETURN[FALSE, "Can't create UDP handle.\n"]; };
FOR i: CARDINAL IN [0..2) DO -- 1 retry
packetStop: BasicTime.Pulses;
milliseconds: LONG CARDINAL;
dg: IPDefs.Datagram ← NEW [IPDefs.DatagramRec];
udp: LONG POINTER TO UDP.BodyRec ← LOOPHOLE[@dg.data];
domain: LONG POINTER TO IPNameUdp.DomainHeader ← LOOPHOLE[@udp.data];
domain^ ← [id: id];
udp.length ← UDP.minLength + IPNameUdp.DomainHeader.SIZE*2;
AppendQuery[udp, domain, query, type, in];
UDP.Send[handle, dg, udp.length];
packetStart ← BasicTime.GetClockPulses[];
histogram.probes ← histogram.probes + 1;
dg ← UDP.Receive[handle, timeOut]; -- timeout in milliSeconds
packetStop ← BasicTime.GetClockPulses[];
IF dg # NIL THEN {
milliseconds ← BasicTime.PulsesToMicroseconds[packetStop-packetStart]/1000;
udp ← LOOPHOLE[@dg.data];
domain ← LOOPHOLE[@udp.data];
IF dg.inHdr.source # where THEN {
IF log ~= NIL THEN IO.PutF[log, "Strange source address: expected %G, found %G = %G: \n",
[rope[IPName.AddressToRope[where]]],
[rope[IPName.AddressToRope[dg.inHdr.source]]],
[rope[IPName.AddressToName[dg.inHdr.source]]]];
UDP.Destroy[handle];
RETURN[FALSE, Rope.Cat["Strange source address: ", IPName.AddressToRope[dg.inHdr.source], " = ", IPName.AddressToName[dg.inHdr.source]]]};
[badReply, how] ← CheckDomainPacket[query, expectAddr, udp, domain];
IF badReply THEN {
IF log ~= NIL THEN {
Report[log, "Incorrect Reply: ", how, "\n"];
IO.PutF[log, "Response time was %G ms.\n", [integer[milliseconds]]]};
UDP.Destroy[handle]; RETURN[TRUE, how]}
ELSE Report[log, "Reply: ok, "];
IF log ~= NIL THEN IO.PutF[log, "The response time was %G ms.\n", [integer[milliseconds]]];
AddToCounter[histogram, milliseconds];
dg ← NIL;
gotResponse ← TRUE;
EXIT;
}
ELSE histogram.lost ← histogram.lost + 1;
ENDLOOP;
IF ~gotResponse THEN {Report[log, "No response.", "\n"]; RETURN[TRUE]};
Report[log, "\n"];
UDP.Destroy[handle];
RETURN[FALSE] };
AddToCounter: PROC [histogram: Histogram, ms: INT] = {
index: INT;
ms ← ms + histogramSlotSize - 1;
index ← ms/histogramSlotSize;
IF index >= histogramSlots THEN index ← histogramSlots-1;
histogram.counters[index] ← histogram.counters[index] + 1; };
PrintStats: PROC [log: IO.STREAM, histogram: Histogram] = {
running: INT ← 0;
now: BasicTime.GMT ← BasicTime.Now[];
nowRope: ROPE ← DFUtilities.DateToRope[[format: explicit, gmt: now]];
startedRope: ROPE ← DFUtilities.DateToRope[[format: explicit, gmt: started]];
Report[log, "Started: ", startedRope, "\n"];
Report[log, "Ended: ", nowRope, "\n"];
IO.PutF[log, " Queries: %G.\n", [integer[histogram.probes]] ];
IF histogram.probes = 0 THEN RETURN;
IO.PutF[log, " No reply: %G, %1.2F%%.\n", [integer[histogram.lost]], [real[100.0*histogram.lost/histogram.probes]] ];
IO.PutRope[log, " Response time histogram:\n"];
FOR i: INT IN [0..histogramSlots) DO
counter: INT ← histogram.counters[i];
milliseconds: INT ← i*histogramSlotSize;
counterPerCent, runningPerCent: REAL;
IF counter = 0 THEN LOOP;
running ← running + counter;
counterPerCent ← 100.0*counter/histogram.probes;
runningPerCent ← 100.0*running/histogram.probes;
IO.PutF[log, "%7G %7.2F %7.2F %7G\n", [integer[counter]], [real[counterPerCent]], [real[runningPerCent]], [integer[milliseconds]] ];
ENDLOOP;
IO.PutRope[log, "\n"];
IO.Flush[log]; };
PrintServerStats: PROC [log: IO.STREAM, server: Rope.ROPE] = {
FOR list: LIST OF TestRecord ← tests, list.rest UNTIL list = NIL DO
this: ROPE ← list.first.server;
IF Rope.Equal[this, server, FALSE] OR Rope.Equal[server, "*", FALSE] THEN {Report[log, "Server: ", this, "\n"]; PrintStats[log, list.first.histogram]; Report[log, "\n"]; IO.Flush[log]};
ENDLOOP;
};
FindPath: PROC [target: ROPE] RETURNS [ok: BOOLEANFALSE, where: IPDefs.Address] = {
whereList: LIST OF IPDefs.Address;
IF IPName.LoadCacheFromName[target, TRUE, TRUE] = down THEN {
IF log # NIL THEN IO.PutF[log, "Can't load name Cache."]; RETURN; };
whereList ← IPName.NameToAddress[target];
IF whereList = NIL THEN {IF log # NIL THEN IO.PutF[log, "Name not found."]; RETURN; };
where ← IPRouter.BestAddress[whereList];
Report[log, " = ", IPName.AddressToRope[where], ".\n"];
RETURN[TRUE, where]; };
CheckDomainPacket: PROC [query: ROPE, expectAddr: IPDefs.Address,
udp: LONG POINTER TO UDP.BodyRec, domain: LONG POINTER TO IPNameUdp.DomainHeader] RETURNS[failed: BOOLEANFALSE, how: ROPENIL] = TRUSTED {
length: INT ← udp.length;
type: IPNameUdp.Type;
class: IPNameUdp.Class;
ttl: INT;
rDataLength: CARDINAL;
name: ROPE;
udp.length ← UDP.minLength + IPNameUdp.DomainHeader.SIZE*2;
SELECT domain.qr FROM
response => IF debug THEN Report[log, "qr: response, "];
ENDCASE => RETURN[TRUE, "qr # response."];
SELECT domain.opcode FROM
query => IF debug THEN Report[log, "op: query"];
ENDCASE => RETURN[TRUE, "op # query."];
IF debug AND log ~= NIL THEN IO.PutF[log, ", length: %G bytes.\n", [integer[length]]];
IF debug AND log ~= NIL THEN IO.PutF[log, "aa: %G, tc: %G, rd: %G, ra: %G\n",
[boolean[domain.aa]], [boolean[domain.tc]], [boolean[domain.rd]], [boolean[domain.ra]]];
SELECT domain.rcode FROM
ok => IF debug THEN Report[log, "op: ok"];
format => RETURN[TRUE, "Format failure."];
serverFailed => RETURN[TRUE, "Server failed."];
nameNotFound => RETURN[TRUE, "nameNotFound."];
notImplemented => RETURN[TRUE, "notImplemented."];
refused => RETURN[TRUE, "refused."];
ENDCASE => RETURN[TRUE, "unknown."];
IF domain.tc THEN {Report[log, " ** TRUNCATED **"]; RETURN[TRUE, "Truncated."];};
IF debug THEN Report[log, "\n"];
IF debug AND log ~= NIL THEN IO.PutF[log, "qdCount: %G, anCount: %G, nsCount: %G, arCount: %G\n",
[integer[domain.qdCount]], [integer[domain.anCount]], [integer[domain.nsCount]], [integer[domain.arCount]]];
IF domain.qdCount # 1 THEN RETURN[TRUE, "qdCount # 1"];
name ← GetName[udp];
IF debug AND log ~= NIL THEN IO.PutF[log, "Query Name: \"%G\", ", [rope[name]]];
type ← GetTwoBytes[udp];
IF type # a THEN RETURN[TRUE, "Incorrect query type."];
class ← GetTwoBytes[udp];
IF class # in THEN RETURN[TRUE, "Incorrect query class."];
IF domain.anCount # 1 THEN RETURN[TRUE, "anCount # 1"];
name ← GetName[udp];
IF ~Rope.Equal[name, query, FALSE] THEN RETURN[TRUE, "Incorrect response name."];
type ← GetTwoBytes[udp];
IF type # a THEN RETURN[TRUE, "Incorrect response type."];
class ← GetTwoBytes[udp];
IF class # in THEN RETURN[TRUE, "Incorrect response class."];
ttl ← GetTtl[udp];
rDataLength ← GetTwoBytes[udp];
SELECT type FROM
a => { -- This is it!
IF rDataLength = 4 THEN {
addr: IPDefs.Address ← GetIPAddress[udp];
IF addr # expectAddr THEN RETURN[TRUE, "Incorrect address."];}
ELSE RETURN[TRUE, "Funny Length for address response."]; };
ENDCASE => { -- Something bogus
RETURN[TRUE, "Bogus RR."]; };
IF domain.nsCount # 0 THEN RETURN[TRUE, "nsCount # 0"];
IF domain.arCount # 0 THEN RETURN[TRUE, "arCount # 0"];
};
GetName: PROC [udp: LONG POINTER TO UDP.BodyRec] RETURNS [rope: ROPE] = TRUSTED {
length: INT ← udp.length;
indirect: INT ← 300B;
rope ← NIL;
DO
bytes: INT ← udp.data[length];
length ← length + 1;
IF bytes = 0 THEN EXIT;
IF bytes >= indirect THEN { -- Indirect link, keep length we have
temp: INT ← (bytes-indirect)*256 + udp.data[length];
temp ← temp + UDP.minLength;
length ← length + 1;
udp.length ← temp;
rope ← Rope.Concat[rope, GetName[udp]];
EXIT; };
FOR i: INT IN [0..bytes) DO
rope ← Rope.Concat[rope, Rope.FromChar[LOOPHOLE[udp.data[length]]]];
length ← length + 1;
ENDLOOP;
IF udp.data[length] # 0 THEN rope ← Rope.Concat[rope, "."];
ENDLOOP;
udp.length ← length; };
longTime: LONG CARDINAL = INT.LAST;
GetTtl: PUBLIC PROC [
udp: LONG POINTER TO UDP.BodyRec] RETURNS [INT] = {
card: LONG CARDINAL ← GetCard[udp];
IF card > longTime THEN card ← longTime;
RETURN[card]; };
GetCard: PROC [
udp: LONG POINTER TO UDP.BodyRec] RETURNS [LONG CARDINAL] = TRUSTED {
ln: Basics.LongNumber;
ln.hi ← GetCardinal[udp];
ln.lo ← GetCardinal[udp];
RETURN[ln.lc]; };
GetCardinal: PROC [
udp: LONG POINTER TO UDP.BodyRec] RETURNS [CARDINAL] = TRUSTED {
RETURN[GetTwoBytes[udp]]; };
GetTwoBytes: PROC [
udp: LONG POINTER TO UDP.BodyRec] RETURNS [UNSPECIFIED] = TRUSTED {
length: INT ← udp.length;
temp: Basics.BytePair;
temp.high ← udp.data[length];
temp.low ← udp.data[length+1];
udp.length ← udp.length + 2;
RETURN[temp]; };
GetIPAddress: PROC [
udp: LONG POINTER TO UDP.BodyRec] RETURNS [a: IPDefs.Address] = TRUSTED {
length: INT ← udp.length;
a[0] ← udp.data[length+0];
a[1] ← udp.data[length+1];
a[2] ← udp.data[length+2];
a[3] ← udp.data[length+3];
udp.length ← udp.length + 4; };
AppendQuery: PROC [
udp: LONG POINTER TO UDP.BodyRec, domain: LONG POINTER TO IPNameUdp.DomainHeader,
query: ROPE, type: IPNameUdp.Type, class: IPNameUdp.Class] = TRUSTED {
AppendName[udp, query];
AppendTwoBytes[udp, type];
AppendTwoBytes[udp, class];
domain.qdCount ← domain.qdCount + 1; };
AppendName: PROC [
udp: LONG POINTER TO UDP.BodyRec, name: ROPE] = TRUSTED {
DO
dot: INT ← Rope.Find[name, "."];
IF dot = -1 THEN EXIT;
IF dot = 0 THEN EXIT; -- Bounds fault
AppendFragment[udp, Rope.Substr[name, 0, dot]];
name ← Rope.Substr[name, dot+1]
ENDLOOP;
IF Rope.Length[name] # 0 THEN AppendFragment[udp, name];
AppendFragment[udp, NIL]; };
AppendFragment: PROC [
udp: LONG POINTER TO UDP.BodyRec, rope: ROPE] = TRUSTED {
length: INT ← udp.length;
chars: INT ← Rope.Length[rope];
udp.data[length] ← chars;
FOR i: INT IN [0..chars) DO
udp.data[length+i+1] ← LOOPHOLE[Rope.Fetch[rope, i]];
ENDLOOP;
udp.length ← udp.length + chars + 1; };
AppendTwoBytes: PROC [
udp: LONG POINTER TO UDP.BodyRec, data: UNSPECIFIED] = TRUSTED {
length: INT ← udp.length;
udp.data[length] ← Basics.HighByte[data];
udp.data[length+1] ← Basics.LowByte[data];
udp.length ← udp.length + 2; };
Report: PROC [log: IO.STREAM, r1, r2, r3, r4: ROPENIL] = TRUSTED {
IF log = NIL THEN RETURN;
IF r1 # NIL THEN {IO.PutRope[log, r1]};
IF r2 # NIL THEN {IO.PutRope[log, r2]};
IF r3 # NIL THEN {IO.PutRope[log, r3]};
IF r4 # NIL THEN {IO.PutRope[log, r4]}; };
SendTheMessage: PROC [server: ROPE, justFailed: BOOLEAN, how: ROPENIL] = {
msg: ROPE;
handle: GVSend.Handle;
ssi: GVSend.StartSendInfo;
msg ← Rope.Cat[msg, "Date: ", IO.PutFR["%G", IO.time[]], "\n"];
msg ← Rope.Cat[msg, "From: Mailer.pa (ArpaGateway NameWatcher)\n"];
msg ← Rope.Cat[msg, "Subject: ", server];
msg ← Rope.Cat[msg,
SELECT TRUE FROM
~justFailed => " name server back up.\n",
how # NIL => " name server problem.\n",
ENDCASE => " name server not responding.\n"
];
msg ← Rope.Cat[msg, "To: ", recipient, "\n"];
msg ← Rope.Cat[msg, "\n"];
IF how ~= NIL THEN msg ← Rope.Cat[msg, "Problem: ", how, "\n"]
ELSE {IF justFailed THEN msg ← Rope.Cat[msg, "Problem could be with the ArpaGateway, Gandalf (Alto connection to IMP), the IMP, or the name server running on Vaxc.\n\nTo restart the name server, kill off the old one and type \"/etc/named\"\n"]};
handle ← GVSend.Create[];
ssi ← GVSend.StartSend[
handle: handle,
senderPwd: senderPwd,
sender: sender,
returnTo: NIL,
validate: TRUE ];
IF ssi ~= ok THEN RETURN;
GVSend.AddRecipient[handle, recipient];
IF GVSend.CheckValidity[handle, NIL] # 1 THEN ERROR;
GVSend.StartText[handle];
GVSend.AddToItem[handle, msg];
GVSend.Send[handle];
};
CheckNameServer: PROC = {
how: ROPENIL;
FOR list: LIST OF TestRecord ← tests, list.rest UNTIL list = NIL DO
server: ROPE ← list.first.server;
wasFailed: BOOLEAN ← list.first.failed;
IF server = NIL THEN EXIT;
[list.first.failed, how] ← Query[server, list.first.query, list.first.expectAddr, list.first.timeOut, list.first.histogram];
IF list.first.sendMessage THEN {
IF list.first.failed AND ~wasFailed THEN {
IF log # NIL THEN Report[log, "Sending failure message to: ", recipient, "\n"];
SendTheMessage[server, TRUE, how]}
ELSE IF wasFailed AND ~list.first.failed THEN {
IF log # NIL THEN Report[log, "Sending up message to: ", recipient, "\n"];
SendTheMessage[server, FALSE]}};
IF log ~= NIL AND stats THEN {PrintStats[log, list.first.histogram]; IO.Flush[log]};
ENDLOOP;
};
Background: ENTRY PROC = {
DO
snooz: CONDITION ← [timeout: Process.SecondsToTicks[interval]];
WAIT snooz;
IF abort THEN {Report[log, "\n", "Aborted.", "\n"]; EXIT};
CheckNameServer[];
ENDLOOP; };
TRUSTED {Process.Detach[FORK Background[]]; };
END.