DIRECTORY
Arpa USING [nullAddress, Address],
ArpaName USING [AddressToName, NameToAddress, ReplyStatus],
ArpaNameSupport USING [AddressInList],
ArpaNameQuery,
BasicTime USING [GMT, Now],
ConvertExtras USING [RopeFromArpaAddress],
DFUtilities USING [DateToRope],
GVSend USING [AddRecipient, AddToItem, CheckValidity, Create, Handle, Send, StartSend, StartSendInfo, StartText],
IO USING [Flush, PutF, PutFR, PutRope, STREAM, time, Value],
Process USING [Detach, SecondsToTicks],
Rope USING [Cat, Equal, ROPE],
UserCredentials
USING [Get];
ArpaNameServerWatcher:
CEDAR
MONITOR
IMPORTS
ArpaName, ArpaNameSupport, ArpaNameQuery, BasicTime, ConvertExtras, DFUtilities, GVSend, IO, Process, Rope, UserCredentials =
BEGIN
ROPE: TYPE = Rope.ROPE;
STREAM: TYPE = IO.STREAM;
started: BasicTime.GMT ← BasicTime.Now[];
senderPwd: ROPE = UserCredentials.Get[].password;
sender: ROPE = UserCredentials.Get[].name;
recipient: ROPE ← "ArpaSupport^.pa";
log: STREAM ← NIL;
← ArpaNameServerWatcher.log ← ViewerIO.CreateViewerStreams["ArpaNameServerWatcher"].out;
← ArpaNameServerWatcher.PrintServerStats[ViewerIO.CreateViewerStreams["Stats"].out, "*"]
interval: CARDINAL ← 1*10*90; -- interval between queries (15 mins)
abort: BOOLEAN ← FALSE;
debug: BOOLEAN ← FALSE;
stats: BOOLEAN ← FALSE;
TestRecord:
TYPE =
RECORD[
server: ROPE,
queryRope: ROPE,
expectAddrs: LIST OF Arpa.Address,
failed: BOOLEAN,
sendMessage: BOOLEAN,
timeOut: INT,
histogram: Histogram]; -- in milliSecs
tests:
LIST
OF TestRecord ←
LIST[
[
server: "arisia.Xerox.COM",
queryRope: "Xerox.com",
expectAddrs: LIST[[13,0,12,232]],
failed: FALSE,
sendMessage: TRUE,
timeOut: 60000,
histogram: NEW[HistogramRecord]],
[
server: "gatekeeper.dec.com",
queryRope: "Xerox.com",
expectAddrs: LIST[[13,0,12,232]],
failed: FALSE,
sendMessage: FALSE,
timeOut: 60000,
histogram: NEW[HistogramRecord]],
[
server: "XAIT.Xerox.Com",
queryRope: "Xerox.com",
expectAddrs: LIST[[13,0,12,232]],
failed: FALSE,
sendMessage: TRUE,
timeOut: 60000,
histogram: NEW[HistogramRecord]],
[
server: "palain.parc.Xerox.COM",
queryRope: "palain.parc.Xerox.COM",
expectAddrs: LIST[[13,1,100,208]],
failed: FALSE,
sendMessage: TRUE,
timeOut: 60000,
histogram: NEW[HistogramRecord]],
[
server: "pooh.parc.Xerox.COM",
queryRope: "pooh.parc.Xerox.COM",
expectAddrs: LIST[[13,2,16,167]],
failed: FALSE,
sendMessage: TRUE,
timeOut: 60000,
histogram: NEW[HistogramRecord]]
];
sequenceNumber: CARDINAL ← 0;
histogramSlots: INT = 300;
histogramSlotSize: INT = 100; -- ms/slot
Histogram: TYPE = REF HistogramRecord;
HistogramRecord:
TYPE =
RECORD [
probes: INT ← 0,
lost: INT ← 0,
counters: ARRAY [0..histogramSlots) OF INT ← ALL[0] ];
Query:
PROC [server:
ROPE, queryRope:
ROPE, expectAddrs:
LIST
OF Arpa.Address, timeOut:
INT, histogram: Histogram]
RETURNS[failed:
BOOLEAN←
FALSE, how:
ROPE ←
NIL] = {
badReply: BOOLEAN ← FALSE;
gotResponse: BOOLEAN ← FALSE;
where: Arpa.Address;
reply: ArpaNameQuery.Reply;
status: ArpaName.ReplyStatus;
Report[log, "\nQuerying ", server];
[where, status,] ← ArpaName.NameToAddress[server];
IF status = down THEN {Report[log, "Can't load name Cache."]; RETURN; };
IF status # ok OR where = Arpa.nullAddress THEN {Report[log, "Can't load address for server."]; RETURN; };
Report[log, " = ", ConvertExtras.RopeFromArpaAddress[where], ".\n"];
reply ← ArpaNameQuery.Query[server: where, query: queryRope, timeout: timeOut];
histogram.probes ← histogram.probes + 1;
IF reply =
NIL
THEN {
-- retry once
Report[log, "No response.", "\n"];
histogram.lost ← histogram.lost + 1;
reply ← ArpaNameQuery.Query[server: where, query: queryRope, timeout: timeOut];
histogram.probes ← histogram.probes + 1;
};
IF reply #
NIL
THEN {
IF reply.source # where
THEN {
IF log ~=
NIL
THEN
IO.PutF[log, "Strange source address: expected %G, found %G = %G: \n",
[rope[ConvertExtras.RopeFromArpaAddress[where]]],
[rope[ConvertExtras.RopeFromArpaAddress[reply.source]]],
[rope[ArpaName.AddressToName[reply.source].name]]];
RETURN[FALSE, Rope.Cat["Strange source address: ", ConvertExtras.RopeFromArpaAddress[reply.source], " = ", ArpaName.AddressToName[reply.source].name]]};
[badReply, how] ← CheckDomainPacket[queryRope, expectAddrs, reply];
IF badReply
THEN {
IF log ~=
NIL
THEN {
Report[log, "Incorrect Reply: ", how, "\n"];
IO.PutF[log, "Response time was %G ms.\n", [integer[reply.responseTime]]]};
RETURN[TRUE, how]}
ELSE Report[log, "Reply: ok, "];
IF log ~=
NIL
THEN
IO.PutF[log, "The response time was %G ms.\n", [integer[reply.responseTime]]];
AddToCounter[histogram, reply.responseTime];
}
ELSE {
histogram.lost ← histogram.lost + 1;
Report[log, "No response.", "\n"]; RETURN[TRUE]};
Report[log, "\n"];
RETURN[FALSE] };
AddToCounter:
PROC [histogram: Histogram, ms:
INT] = {
index: INT;
ms ← ms + histogramSlotSize - 1;
index ← ms/histogramSlotSize;
IF index >= histogramSlots THEN index ← histogramSlots-1;
histogram.counters[index] ← histogram.counters[index] + 1; };
PrintStats:
PROC [log:
IO.
STREAM, histogram: Histogram] = {
running: INT ← 0;
now: BasicTime.GMT ← BasicTime.Now[];
nowRope: ROPE ← DFUtilities.DateToRope[[format: explicit, gmt: now]];
startedRope: ROPE ← DFUtilities.DateToRope[[format: explicit, gmt: started]];
Report[log, "Started: ", startedRope, "\n"];
Report[log, "Ended: ", nowRope, "\n"];
IO.PutF[log, " Queries: %G.\n", [integer[histogram.probes]] ];
IF histogram.probes = 0 THEN RETURN;
IO.PutF[log, " No reply: %G, %1.2F%%.\n", [integer[histogram.lost]], [real[100.0*histogram.lost/histogram.probes]] ];
IO.PutRope[log, " Response time histogram:\n"];
FOR i:
INT
IN [0..histogramSlots)
DO
counter: INT ← histogram.counters[i];
milliseconds: INT ← i*histogramSlotSize;
counterPerCent, runningPerCent: REAL;
IF counter = 0 THEN LOOP;
running ← running + counter;
counterPerCent ← 100.0*counter/histogram.probes;
runningPerCent ← 100.0*running/histogram.probes;
IO.PutF[log, "%7G %7.2F %7.2F %7G\n", [integer[counter]], [real[counterPerCent]], [real[runningPerCent]], [integer[milliseconds]] ];
IO.PutRope[log, "\n"];
IO.Flush[log]; };
PrintServerStats:
PROC [log:
IO.
STREAM, server: Rope.
ROPE] = {
FOR list:
LIST
OF TestRecord ← tests, list.rest
UNTIL list =
NIL
DO
this: ROPE ← list.first.server;
IF Rope.Equal[this, server, FALSE] OR Rope.Equal[server, "*", FALSE] THEN {Report[log, "Server: ", this, "\n"]; PrintStats[log, list.first.histogram]; Report[log, "\n"]; IO.Flush[log]};
ENDLOOP;
};
CheckDomainPacket:
PROC [queryRope:
ROPE, expectAddrs:
LIST
OF Arpa.Address, reply: ArpaNameQuery.Reply]
RETURNS[failed:
BOOLEAN ←
FALSE, how:
ROPE ←
NIL] =
TRUSTED {
SELECT reply.hdr.qr
FROM
response => IF debug THEN Report[log, "qr: response, "];
ENDCASE => RETURN[TRUE, "qr # response."];
SELECT reply.hdr.opcode
FROM
query => IF debug THEN Report[log, "op: query"];
ENDCASE => RETURN[TRUE, "op # query."];
IF debug AND log ~= NIL THEN IO.PutF[log, ", length: %G bytes.\n", [integer[reply.domainPacketLength]]];
IF debug
AND log ~=
NIL
THEN
IO.PutF[log, "aa: %G, tc: %G, rd: %G, ra: %G\n",
[boolean[reply.hdr.authoritative]], [boolean[reply.hdr.truncated]], [boolean[reply.hdr.recurDesired]], [boolean[reply.hdr.recurAvail]]];
SELECT reply.hdr.rcode
FROM
ok => IF debug THEN Report[log, "op: ok"];
format => RETURN[TRUE, "Format error."];
serverFailed => RETURN[TRUE, "serverFailed."];
nameError => RETURN[TRUE, "nameError."];
notImplemented => RETURN[TRUE, "notImplemented."];
refused => RETURN[TRUE, "refused."];
ENDCASE => RETURN[TRUE, "unknown."];
IF reply.hdr.truncated THEN {Report[log, " ** TRUNCATED **"]; RETURN[TRUE, "Truncated."];};
IF debug THEN Report[log, "\n"];
IF debug
AND log ~=
NIL
THEN
IO.PutF[log, "qdCount: %G, anCount: %G, nsCount: %G, arCount: %G\n",
[integer[reply.hdr.qdCount]], [integer[reply.hdr.anCount]], [integer[reply.hdr.nsCount]], [integer[reply.hdr.arCount]]];
IF reply.hdr.qdCount # 1 THEN RETURN[TRUE, "qdCount # 1"];
IF debug AND log ~= NIL THEN IO.PutF[log, "Query Name: \"%G\", ", [rope[reply.questions[0].name]]];
IF reply.questions[0].type # a THEN RETURN[TRUE, "Incorrect query type."];
IF reply.questions[0].class # in THEN RETURN[TRUE, "Incorrect query class."];
IF reply.anCount # 1 THEN RETURN[TRUE, "anCount # 1"];
WITH reply.answers[0]
SELECT
FROM
rr: ArpaNameQuery.
ARR => {
IF ~Rope.Equal[rr.name, queryRope, FALSE] THEN RETURN[TRUE, "Incorrect response name."];
IF rr.type # a THEN RETURN[TRUE, "Incorrect response type."];
IF rr.class # in THEN RETURN[TRUE, "Incorrect response class."];
IF rr.dataLength # 4 THEN RETURN[TRUE, "Funny Length.\n"];
IF ~ArpaNameSupport.AddressInList[expectAddrs, rr.address] THEN RETURN[TRUE, "Incorrect address."];
};
ENDCASE => RETURN[TRUE, ", Bogus RR.\n"];
IF reply.hdr.nsCount # 0 THEN RETURN[TRUE, "nsCount # 0"];
IF reply.hdr.arCount # 0 THEN RETURN[TRUE, "arCount # 0"];
};
Report:
PROC [log:
IO.
STREAM, r1, r2, r3, r4:
ROPE ←
NIL] =
TRUSTED {
IF log = NIL THEN RETURN;
IF r1 # NIL THEN {IO.PutRope[log, r1]};
IF r2 # NIL THEN {IO.PutRope[log, r2]};
IF r3 # NIL THEN {IO.PutRope[log, r3]};
IF r4 # NIL THEN {IO.PutRope[log, r4]}; };
SendTheMessage:
PROC [server:
ROPE, justFailed:
BOOLEAN, how:
ROPE←
NIL] = {
msg: ROPE;
handle: GVSend.Handle;
ssi: GVSend.StartSendInfo;
msg ← Rope.Cat[msg, "Date: ", IO.PutFR["%G", IO.time[]], "\n"];
msg ← Rope.Cat[msg, "From: Mailer.pa (ArpaGateway NameWatcher)\n"];
msg ← Rope.Cat[msg, "Subject: ", server];
msg ← Rope.Cat[msg,
SELECT
TRUE
FROM
~justFailed => " name server back up.\n",
how # NIL => " name server problem.\n",
ENDCASE => " name server not responding.\n"
];
msg ← Rope.Cat[msg, "To: ", recipient, "\n"];
msg ← Rope.Cat[msg, "\n"];
IF how ~= NIL THEN msg ← Rope.Cat[msg, "Problem: ", how, "\n"]
ELSE {IF justFailed THEN msg ← Rope.Cat[msg, "Problem could be with the ArpaGateway, a cisco router, or the name server on ", server, ".\n\nTo restart the name server; as superuser kill off the old one if it is still running and type \"/usr/etc/in.named\".\n"]};
handle ← GVSend.Create[];
ssi ← GVSend.StartSend[
handle: handle,
senderPwd: senderPwd,
sender: sender,
returnTo: NIL,
validate: TRUE ];
IF ssi ~= ok THEN RETURN;
GVSend.AddRecipient[handle, recipient];
IF GVSend.CheckValidity[handle, NIL] # 1 THEN ERROR;
GVSend.StartText[handle];
GVSend.AddToItem[handle, msg];
GVSend.Send[handle];
};
CheckNameServer:
PROC = {
how: ROPE ← NIL;
FOR list:
LIST
OF TestRecord ← tests, list.rest
UNTIL list =
NIL
DO
server: ROPE ← list.first.server;
wasFailed: BOOLEAN ← list.first.failed;
IF server = NIL THEN EXIT;
[list.first.failed, how] ← Query[server, list.first.queryRope, list.first.expectAddrs, list.first.timeOut, list.first.histogram];
IF list.first.sendMessage
THEN {
IF list.first.failed
AND ~wasFailed
THEN {
IF log # NIL THEN Report[log, "Sending failure message to: ", recipient, "\n"];
SendTheMessage[server, TRUE, how]}
ELSE
IF wasFailed
AND ~list.first.failed
THEN {
IF log # NIL THEN Report[log, "Sending up message to: ", recipient, "\n"];
SendTheMessage[server, FALSE]}};
IF log ~= NIL AND stats THEN {PrintStats[log, list.first.histogram]; IO.Flush[log]};
ENDLOOP;
};