diff -uNr a/awklogbot/logbot.awk b/awklogbot/logbot.awk --- a/awklogbot/logbot.awk false +++ b/awklogbot/logbot.awk 455214c8ec731069aef8ffaa7f60402bdeb639a7c0db18fc953f95c7a19242114a72e292a8eb314df20a9e8f297a36f6b0880b9bd3809e0b2af431e203319896 @@ -0,0 +1,413 @@ +#!/usr/bin/gawk -f + +########################################################################### +# Multi-chan irc bot *prototype* citing content and logging lines to 2 local mysql databases: +## raw log lines to a log db +## formatted log lines for real-time publication to an mp-wp database +# for an example of mpwp output see e.g. http://ossasepia.com/2020/09/01/ossasepia-logs-for-Sep-2020/ +# +# The full decoupling of raw logging from formatted publishing was the next step that never went live: the bot would then just log the raw lines and leave the rest to a mysql trigger. +# NB: make SURE that the escaping and formatting are fully working the way you want them to, so that you don't get any surprises, especially if you expose the bot to fully public (unfiltered) input. +# NB: CHANGE in the BEGIN section further down the network, credentials, categories, bot names, owner, URL to those relevant to you. +# +# Known issues that never got looked into: +## unicode characters will look like the shit that they are; +## the % character apparently gets an extra \ somewhere in the pipeline to publication. +# +# This bot reacts to: +## its owner's commands in pm +## known URLs in chan by citing the corresponding content +# +# This code requires: +## gawk (1.3 on CentOS 6 known to work, possibly any version supporting sockets will work just as well, see https://www.gnu.org/software/gawk/manual/html_node/TCP_002fIP-Networking.html) +## keksum or equivalent (see http://fixpoint.welshcomputing.com/2019/keksum-a-keccak-implementation-in-c-as-standalone-unix-utility-genesis/) +############################################################################ + +#connecting to the specified service using given credentials +function connect(network_, nick_, pass_) { + print "Connecting to " network_ + #is this REALLY needed? + network_ |& getline + print $0 + print "NICK " nick_ |& network_ + print "USER " nick_ " " nick_ " " nick_ " :" nick_ |& network_ + print "NICKSERV IDENTIFY " nick_ " " pass_ |& network_ +} + +#joining a list of chans with some initial delay +function joinall(net_, chans_, delay_) { + system("sleep " delay_); #wait a bit + for (c in chans_) + print "JOIN " chans_[c] |& net_ +} + +#quote for keksum - minimal +function quotesc( txt_) { + gsub(/\\/,"\\\\", txt_); + gsub(/'/, "\\'", txt_); +# gsub(/`/, "\\`", txt_); #not needed since not interpreted +# gsub(/\$/, "\\$", txt_); #not needed since not interpreted + return txt_; +} + +#escape for mysql +function mysqlescape( txt_) { + gsub(/\\/, "\\\\\\\\", txt_); +# gsub(/%/, "\\%", txt_); #not needed? +# gsub(/_/, "\\_", txt_); #not needed? + gsub(/"/, "\\\"", txt_); + gsub(/'/, "\\'", txt_); + gsub(/`/, "\\`", txt_); + return txt_; +} + +#escape for html +function htmlescape( txt_) { +# gsub(/%/, "\\%", txt_); #mysql requirement (?) + + gsub(/\\/, "\\\\\\\\", txt_); + gsub(/&/, "\\&", txt_); + gsub(/\"/, "\\"", txt_); + gsub(/'/, "\\'", txt_); + gsub(//, "\\>", txt_); + + gsub(/\$/, "\\$", txt_); + gsub(/`/, "\\`", txt_); + + return txt_; +} + +function finddateoflogline( logcmdbase_, logtable_, room_, number_) { + findcmd=logcmdbase_ "\"select line_time from " logtable_ " where chan='" substr(room_, 2, length(room_)-1) "' and line_number=" number_ ";\""; +# print "Running: " findcmd; + logln="" + findcmd | getline logln; + close(findcmd); + return substr(logln, 1, length(logln)-1); #without trailing newline +} + +function findlogline( logcmdbase_, logtable_, room_, number_) { + findcmd=logcmdbase_ "\"select coalesce(concat(line_time, ' (#', chan, ') ', speaker, ': ', payload), ' ') from " logtable_ " where chan='" room_ "' and line_number=" number_ ";\""; +# print "Running: " findcmd; + logln="" + findcmd | getline logln; + close(findcmd); + return substr(logln, 1, length(logln)-1); #without trailing newline +} + +#logs to logger's own db - raw lines + hash +#NOT meant for pms - it will log it as happening in chan "room_" +function log2db(logcmdbase_, logtable_, room_, payload_, datetime_, fromuser_) { + #logging to raw logs db + #get last line_number, if any + cntcmd=logcmdbase_ "\"select coalesce(max(line_number), 1000000) from " logtable_ " where chan='" substr(room_, 2, length(room_)-1) "';\" "; + cntcmd | getline lno; + close(cntcmd); + lno=lno+1; + #calculate kekhash + hashcmd="echo -n $'" quotesc(payload_) "' | keksum -s256" + hash="" + hashcmd | getline hash; + close(hashcmd); + if (hash=="") { + print "ERROR: failed to get hash for line: " payload_ + print "hashcmd: " hashcmd + return -1; + } + else { + #remove the newline from end of hash too + hash=substr(hash, 1, length(hash)-1); + #got hash, so now log line, hm + insertcmd=logcmdbase_ "\"INSERT into " logtable_ " (line_number, line_time, chan, speaker, payload, kekhash) VALUES ("; + # line_number and date+time + insertcmd = insertcmd lno ", STR_TO_DATE('" datetime_ "', '%Y-%m-%d %H:%i:%s')"; + #chan, speaker, payload, hash + chanf=substr(room_, 2, length(room_)-1); #remove starting # + insertcmd = insertcmd ", '" chanf "', '" mysqlescape(fromuser_) "', '" mysqlescape(payload_) "', '" hash "');\" " + #done, so run it +# print "Running: '" insertcmd "'"; + system(insertcmd); + } + return lno; +} + +#logging a line to the mpwp database - aka for publishing basically +function log2mpwp(mpwpcmdbase_, mpwptable_, categtable_, mpwpuserid_, emptypost_, maincateg_, room_, payload_, datetime_, fromuser_, lineid_, categ_, mm_, bots_, prevdate_) { + #get id of existing post - if any + dd=substr(datetime_, 1, 10); #day + tt=substr(datetime_, 12, 5); #time hh:mm + postname=room_ " Logs for " mm_[substr(dd, 6,2)] " " substr(dd, 1, 4); #monthly, NOT daily! + dbpostname=substr(room_,2) "-logs-for-" mm_[substr(dd, 6,2)] "-" substr(dd, 1, 4); + topublish=0; #no need to publish, it's already published + pid_ = -1; + pidcmd = mpwpcmdbase_ "\"select coalesce(ID, -1) from " mpwptable_ " where post_name ='" dbpostname "';\" "; +# print "Running query: " pidcmd + pidcmd | getline pid_; + close(pidcmd); + pid_ = substr(pid_, 1, length(pid_)-1); #remove newline + if (pid_ <= 0) { + #no existing article, so insert it + cmd_ = mpwpcmdbase_ "\" insert into " mpwptable_ " (post_author, post_content, post_title, post_status, post_name, post_modified, post_date) values (" mpwpuserid_ ", '" emptypost_ "', '" postname "', 'draft', '" dbpostname "', now(), now());\" "; +# print "Running insert article: " cmd_ + system(cmd_); + #get pid again and update category too! + pidcmd | getline pid_; + close(pidcmd); + pid_ = substr(pid_, 1, length(pid_)-1); #remove newline + if (pid_ <= 0) { + print "ERROR: failed to get pid AFTER insert: " pidcmd; + } + else { + categs1=mpwpcmdbase_ "\"insert into " categtable_ " (object_id, term_taxonomy_id, term_order) values (" pid_ ", " maincateg_ ", 0);\" " +# print "Running insert: " categs1; + system(categs1); + categs2=mpwpcmdbase_ "\"insert into " categtable_ " (object_id, term_taxonomy_id, term_order) values (" pid_ ", " categ_[substr(room_, 2)] ", 0);\" " +# print "Running insert: " categs2; + system(categs2); + #publish the article too, as otherwise it ends up needing manual publish, ugh. + topublish=1; + } + } + #the above should have got the pid, but check again, just in case + if (pid_ != -1) { + #first check if date has changed ie if an additional "day" line is needed + if (length(prevdate_) >= 10) { + #there IS a previous date, so split and compare + prevd=substr(prevdate_,1,10); + thisd=substr(dd,1,10); + if (thisd != prevd) { + #if day is not the same, insert a line with link + cmd_day = mpwpcmdbase_ "\"update " mpwptable_ " set post_content=replace(post_content, '', '\\n\\nDay changed to " thisd "\\n') where id=" pid_ ";\" " +# print "Running query: '" cmd_day "'" + system(cmd_day); + } + } + + #update the article with the new line + pload_=htmlescape(payload_); #escape mysql characters; + #treatment of links, either plain http/https or [][] format + p1=gensub(/(http[^ \[\]]+)/, "\\1", "g", pload_); + #links [][] + pload_=gensub(/(\[[^>\]]*>)(http[^<]*)(<\/a>\]\[)([^\[\]]*)(\])/, "\\4", "g", p1); + + if (fromuser_ in bots_) + cmd_ = mpwpcmdbase_ "\"update " mpwptable_ " set post_content=replace(post_content, '', '\\n\\n" fromuser_ ":\\n" pload_ "\\n[" tt "]\\n') where id=" pid_ ";\" " + else + cmd_ = mpwpcmdbase_ "\"update " mpwptable_ " set post_content=replace(post_content, '', '\\n\\n" fromuser_ ":\\n" pload_ "\\n[" tt "]\\n') where id=" pid_ ";\" " +# print "Running query: '" cmd_ "'" + system(cmd_); + lineid_ = lineid_+1; + #check and publish, if needed + if (topublish > 0) { + pubcmd = mpwpcmdbase_ "\" update " mpwptable_ " set post_status='publish' where id=" pid_ ";\" " +# print "Running publish query: " pubcmd + system(pubcmd); + } + #nothing else for now! + } +} + +function getdatetime() { + #get today's date and time as it will be needed anyway + ln="date '+%Y-%m-%d %H:%M:%S'" + ln | getline datetime_ + close(ln) + datetime_=substr(datetime_,1, length(datetime_)-1) #remove newline char + return datetime_ +} + +#this function relies on global variables, ugh. +function sayline( ss ) { + if (ss != "" && ss != " ") { + print "PRIVMSG " room " :" ss |& ircnet + datetime=getdatetime(); + lid = log2db(logcmdbase, logtable, room, ss, datetime, nick); +# print "Log2db returned lid: " lid + if (lid > 0) { + prevdate=finddateoflogline(logcmdbase, logtable, room, lid-1); + log2mpwp(mpwpcmdbase, mpwptable, categtable, mpwpuserid, emptypost, maincateg, room, ss, datetime, nick, lid, categ, mm, bots, prevdate); + } + } #end of saying bot's own line +} + +BEGIN { + #redefine record separators to match IRC style CR-LF separators of lines + RS = ORS = "\r\n" + + #network to connect to + ircnet="/inet/tcp/0/chat.freenode.net/6667" + + # CHANGE those to the values relevant for you + #credentials, owner and chans to join automatically + nick="sonofawitch" + pswd="yourownpassword" + owner="diana_coman" + cmdprefix="!s" + #add as many/few chans as you need; see also categories, further down + chans["#ossasepia"]="#ossasepia" + chans["#eulora"]="#eulora" + + #logdb credentials for raw logging + logdbuser="logdbusername" + logdbpswd="logdbpassword" + logdb="logdatabasename" + logtable="logtablename" + + #mpwp credentials for publishing rather than logging + mpwpdbuser="mpwpdatabaseuser" + mpwpdbpswd="mpwpdatabasepassword" + mpwpdb="mpwpdatabasename" + mpwptable="your_posts_table" + categtable="your_term_relationships" + maincateg=111 #change according to categories in your mpwp blog! + mpwpuserid=1 #id of wordpress "author" user for log articles + emptypost="
" + + #NB: -s silent -N no column names -e execute & quit + mpwpcmdbase="mysql --user='" mpwpdbuser "' --password='" mpwpdbpswd "' --database='" mpwpdb "' -sNe " + logcmdbase="mysql --user='" logdbuser "' --password='" logdbpswd "' --database='" logdb "' -sNe " + + #categories based on chan - change and/or add according to your mpwp categories + categ["ossasepia"]=112 + categ["eulora"]=113 + + #bots - to gray out their lines + bots["a111"]=1; + bots["deedbot"]=1; + bots["feedbot"]=1; + bots["auctionbot"]=1; + bots["lobbesbot"]=1; + bots["snsabot"]=1; + bots["ericbot"]=1; + bots["drunkenbot"]=1; + bots["ossabot"]=1; + bots["sonofawitch"]=1; + + #months for easy date conversion + mm["01"]="Jan"; + mm["02"]="Feb"; + mm["03"]="Mar"; + mm["04"]="Apr"; + mm["05"]="May"; + mm["06"]="Jun"; + mm["07"]="Jul"; + mm["08"]="Aug"; + mm["09"]="Sep"; + mm["10"]="Oct"; + mm["11"]="Nov"; + mm["12"]="Dec"; + + #need to set it BEFORE connect, ofc: timer to reconnect if nothing arrives. + #2 minutes should be enough for everyone... + PROCINFO[ircnet, "READ_TIMEOUT"]=120000 + + #connect, delay, join + connect(ircnet, nick, pswd); + joinall(ircnet, chans, 1); + + #loop reading lines and processing + active=1; + do { + if ( (ircnet |& getline) > 0) { + if ($1=="PING") { + msg="PONG " $2; + print msg |& ircnet + } + else if ($2 != "PRIVMSG") { + # ignore all non "privmsg" ; easier than ignoring NOTICES explicitly + print "Ignoring line: " $0 #print to stdout if that's logged. + } + else { + #PRIVMSG received so extract the username, ignoring host and/or cloak + fromuser = substr($0,2,index(substr($0,2),"!")-1) + room=$3; + prefix=substr($4,2);#ditch the starting : + usrcmd=$5; + $1="" + $2="" + $3="" + payload=substr($0,5, length($0)); + #check and treat separately the "ACTION" lines because they stink. + pos=match(payload, "ACTION"); + if (pos != 0) { + #it has starting and trailing 0x01 nonsense + payload=substr(payload, pos+length("ACTION")+1, length(payload)-1-pos-length("ACTION")); + } + #chan stuff (from the list; theoretically it can't not be in the list, either) + if (room in chans) { + #log it, first + datetime=getdatetime(); + lid = log2db(logcmdbase, logtable, room, payload, datetime, fromuser); + if (lid > 0) { + prevdate=finddateoflogline(logcmdbase, logtable, room, lid-1); + log2mpwp(mpwpcmdbase, mpwptable, categtable, mpwpuserid, emptypost, maincateg, room, payload, datetime, fromuser, lid, categ, mm, bots, prevdate); + } + else print "Got lid<0 : " lid + + #react to it: citing from ossasepia.com - CHANGE THIS to your url. + tmp = payload; + do { + idx = match(tmp, /(http:\/\/ossasepia.com\/[^\/]*\/[^\/]*\/[^\/]*\/)([^\-]*)(-[^#]*#)([[:digit:]]+)/, arr); + if (idx > 0) { + s = findlogline(logcmdbase, logtable, arr[2], arr[4]); + sayline(s); #say it, log it, the whole thing + tmp = substr(tmp, idx+length(arr[0])); + } + } while (idx > 0 && length(tmp) > 21); #needs at least http://ossasepia.com + + #react to it: commands + s="" + if (prefix == cmdprefix && usrcmd=="hi") + s = "Hello there, " fromuser + else if (prefix == cmdprefix && usrcmd=="help") + s = "!s is my prefix for commands: hi, help" + #else if (prefix == cmdprefix && usrcmd=="s") #search + + #say it AND log it + sayline(s); + + }#end of room in chans, try maybe it's PM with owner + else if (room == nick && fromuser == owner) { + #hence, PM to the bot; if room != nick, it's a message in chan=room so do NOT speak + room = fromuser; + if (prefix == cmdprefix && usrcmd == "quit") { + print "QUIT :Disconnecting." |& ircnet + active = 0; #not active anymore, should exit + } #end quit + else if (prefix == cmdprefix && usrcmd=="reconnect") { + print "QUIT :Disconnecting." |& ircnet + close(ircnet) + #wait a bit (?) - it will/should anyway happen when failing to read from the closed socket + connect(ircnet, nick, pswd); + joinall(ircnet, chans, 1); + } #end reconnect + else if (prefix == cmdprefix && usrcmd=="join") { + chan=$6 + print "JOIN " chan |& ircnet + #add chan to list + chans[chan]=chan; + } + } #end of PM with owner + #else neither in chan nor pm with owner so ignore it + }#every type of message is covered at least + } #timeout or fail on reading from socket, hm. + else { + if (ERRNO != "") + print "ERROR: " ERRNO + printf("WILL RECONNECT..."); + close(ircnet); + system("sleep 1"); #wait a bit + connect(ircnet, nick, pswd); + system("sleep 1"); #wait a bit + for (c in chans) + print "JOIN " chans[c] |& ircnet + active = 1; + } + } while (active > 0); +}#end BEGIN + + +END { + close(ircnet); +} diff -uNr a/awklogbot/manifest b/awklogbot/manifest --- a/awklogbot/manifest false +++ b/awklogbot/manifest e6c6a79f3b29ee2f05d76e44efc2b0c26a26320ce6a2cfe30dc4b8e22763fc98367a563f26f2b21cc4ebc57b75f9acca6eaf698f2d27344c59ad3b6ff3458d76 @@ -0,0 +1 @@ +652827 gawk_logbot_genesis diana_coman Multi-chan irc bot citing from known URL, logging raw lines to a local mysql logtable and formatted lines for publication to a local mpwp mysql table.