diff -uNr a/awklogbot/logbot.awk b/awklogbot/logbot.awk --- a/awklogbot/logbot.awk false +++ b/awklogbot/logbot.awk 455214c8ec731069aef8ffaa7f60402bdeb639a7c0db18fc953f95c7a19242114a72e292a8eb314df20a9e8f297a36f6b0880b9bd3809e0b2af431e203319896 @@ -0,0 +1,413 @@ +#!/usr/bin/gawk -f + +########################################################################### +# Multi-chan irc bot *prototype* citing content and logging lines to 2 local mysql databases: +## raw log lines to a log db +## formatted log lines for real-time publication to an mp-wp database +# for an example of mpwp output see e.g. http://ossasepia.com/2020/09/01/ossasepia-logs-for-Sep-2020/ +# +# The full decoupling of raw logging from formatted publishing was the next step that never went live: the bot would then just log the raw lines and leave the rest to a mysql trigger. +# NB: make SURE that the escaping and formatting are fully working the way you want them to, so that you don't get any surprises, especially if you expose the bot to fully public (unfiltered) input. +# NB: CHANGE in the BEGIN section further down the network, credentials, categories, bot names, owner, URL to those relevant to you. +# +# Known issues that never got looked into: +## unicode characters will look like the shit that they are; +## the % character apparently gets an extra \ somewhere in the pipeline to publication. +# +# This bot reacts to: +## its owner's commands in pm +## known URLs in chan by citing the corresponding content +# +# This code requires: +## gawk (1.3 on CentOS 6 known to work, possibly any version supporting sockets will work just as well, see https://www.gnu.org/software/gawk/manual/html_node/TCP_002fIP-Networking.html) +## keksum or equivalent (see http://fixpoint.welshcomputing.com/2019/keksum-a-keccak-implementation-in-c-as-standalone-unix-utility-genesis/) +############################################################################ + +#connecting to the specified service using given credentials +function connect(network_, nick_, pass_) { + print "Connecting to " network_ + #is this REALLY needed? + network_ |& getline + print $0 + print "NICK " nick_ |& network_ + print "USER " nick_ " " nick_ " " nick_ " :" nick_ |& network_ + print "NICKSERV IDENTIFY " nick_ " " pass_ |& network_ +} + +#joining a list of chans with some initial delay +function joinall(net_, chans_, delay_) { + system("sleep " delay_); #wait a bit + for (c in chans_) + print "JOIN " chans_[c] |& net_ +} + +#quote for keksum - minimal +function quotesc( txt_) { + gsub(/\\/,"\\\\", txt_); + gsub(/'/, "\\'", txt_); +# gsub(/`/, "\\`", txt_); #not needed since not interpreted +# gsub(/\$/, "\\$", txt_); #not needed since not interpreted + return txt_; +} + +#escape for mysql +function mysqlescape( txt_) { + gsub(/\\/, "\\\\\\\\", txt_); +# gsub(/%/, "\\%", txt_); #not needed? +# gsub(/_/, "\\_", txt_); #not needed? + gsub(/"/, "\\\"", txt_); + gsub(/'/, "\\'", txt_); + gsub(/`/, "\\`", txt_); + return txt_; +} + +#escape for html +function htmlescape( txt_) { +# gsub(/%/, "\\%", txt_); #mysql requirement (?) + + gsub(/\\/, "\\\\\\\\", txt_); + gsub(/&/, "\\&", txt_); + gsub(/\"/, "\\"", txt_); + gsub(/'/, "\\'", txt_); + gsub(/, "\\<", txt_); + gsub(/>/, "\\>", txt_); + + gsub(/\$/, "\\$", txt_); + gsub(/`/, "\\`", txt_); + + return txt_; +} + +function finddateoflogline( logcmdbase_, logtable_, room_, number_) { + findcmd=logcmdbase_ "\"select line_time from " logtable_ " where chan='" substr(room_, 2, length(room_)-1) "' and line_number=" number_ ";\""; +# print "Running: " findcmd; + logln="" + findcmd | getline logln; + close(findcmd); + return substr(logln, 1, length(logln)-1); #without trailing newline +} + +function findlogline( logcmdbase_, logtable_, room_, number_) { + findcmd=logcmdbase_ "\"select coalesce(concat(line_time, ' (#', chan, ') ', speaker, ': ', payload), ' ') from " logtable_ " where chan='" room_ "' and line_number=" number_ ";\""; +# print "Running: " findcmd; + logln="" + findcmd | getline logln; + close(findcmd); + return substr(logln, 1, length(logln)-1); #without trailing newline +} + +#logs to logger's own db - raw lines + hash +#NOT meant for pms - it will log it as happening in chan "room_" +function log2db(logcmdbase_, logtable_, room_, payload_, datetime_, fromuser_) { + #logging to raw logs db + #get last line_number, if any + cntcmd=logcmdbase_ "\"select coalesce(max(line_number), 1000000) from " logtable_ " where chan='" substr(room_, 2, length(room_)-1) "';\" "; + cntcmd | getline lno; + close(cntcmd); + lno=lno+1; + #calculate kekhash + hashcmd="echo -n $'" quotesc(payload_) "' | keksum -s256" + hash="" + hashcmd | getline hash; + close(hashcmd); + if (hash=="") { + print "ERROR: failed to get hash for line: " payload_ + print "hashcmd: " hashcmd + return -1; + } + else { + #remove the newline from end of hash too + hash=substr(hash, 1, length(hash)-1); + #got hash, so now log line, hm + insertcmd=logcmdbase_ "\"INSERT into " logtable_ " (line_number, line_time, chan, speaker, payload, kekhash) VALUES ("; + # line_number and date+time + insertcmd = insertcmd lno ", STR_TO_DATE('" datetime_ "', '%Y-%m-%d %H:%i:%s')"; + #chan, speaker, payload, hash + chanf=substr(room_, 2, length(room_)-1); #remove starting # + insertcmd = insertcmd ", '" chanf "', '" mysqlescape(fromuser_) "', '" mysqlescape(payload_) "', '" hash "');\" " + #done, so run it +# print "Running: '" insertcmd "'"; + system(insertcmd); + } + return lno; +} + +#logging a line to the mpwp database - aka for publishing basically +function log2mpwp(mpwpcmdbase_, mpwptable_, categtable_, mpwpuserid_, emptypost_, maincateg_, room_, payload_, datetime_, fromuser_, lineid_, categ_, mm_, bots_, prevdate_) { + #get id of existing post - if any + dd=substr(datetime_, 1, 10); #day + tt=substr(datetime_, 12, 5); #time hh:mm + postname=room_ " Logs for " mm_[substr(dd, 6,2)] " " substr(dd, 1, 4); #monthly, NOT daily! + dbpostname=substr(room_,2) "-logs-for-" mm_[substr(dd, 6,2)] "-" substr(dd, 1, 4); + topublish=0; #no need to publish, it's already published + pid_ = -1; + pidcmd = mpwpcmdbase_ "\"select coalesce(ID, -1) from " mpwptable_ " where post_name ='" dbpostname "';\" "; +# print "Running query: " pidcmd + pidcmd | getline pid_; + close(pidcmd); + pid_ = substr(pid_, 1, length(pid_)-1); #remove newline + if (pid_ <= 0) { + #no existing article, so insert it + cmd_ = mpwpcmdbase_ "\" insert into " mpwptable_ " (post_author, post_content, post_title, post_status, post_name, post_modified, post_date) values (" mpwpuserid_ ", '" emptypost_ "', '" postname "', 'draft', '" dbpostname "', now(), now());\" "; +# print "Running insert article: " cmd_ + system(cmd_); + #get pid again and update category too! + pidcmd | getline pid_; + close(pidcmd); + pid_ = substr(pid_, 1, length(pid_)-1); #remove newline + if (pid_ <= 0) { + print "ERROR: failed to get pid AFTER insert: " pidcmd; + } + else { + categs1=mpwpcmdbase_ "\"insert into " categtable_ " (object_id, term_taxonomy_id, term_order) values (" pid_ ", " maincateg_ ", 0);\" " +# print "Running insert: " categs1; + system(categs1); + categs2=mpwpcmdbase_ "\"insert into " categtable_ " (object_id, term_taxonomy_id, term_order) values (" pid_ ", " categ_[substr(room_, 2)] ", 0);\" " +# print "Running insert: " categs2; + system(categs2); + #publish the article too, as otherwise it ends up needing manual publish, ugh. + topublish=1; + } + } + #the above should have got the pid, but check again, just in case + if (pid_ != -1) { + #first check if date has changed ie if an additional "day" line is needed + if (length(prevdate_) >= 10) { + #there IS a previous date, so split and compare + prevd=substr(prevdate_,1,10); + thisd=substr(dd,1,10); + if (thisd != prevd) { + #if day is not the same, insert a line with link + cmd_day = mpwpcmdbase_ "\"update " mpwptable_ " set post_content=replace(post_content, '', '\\n