+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +Telegram = "1da6f4ae-116c-4c38-8ee9-19974ff3601d" + [compat] julia = "1" diff --git a/src/TelegramMarkov.jl b/src/TelegramMarkov.jl index e3d9931..db53eaf 100644 --- a/src/TelegramMarkov.jl +++ b/src/TelegramMarkov.jl @@ -1,5 +1,49 @@ module TelegramMarkov -# Write your package code here. +function initialize end +function make_sentence end +function list_usernames end + +function fetch_in_env(varname) + if !(varname in keys(ENV)) + error_string = """ + Please, set $varname in your environment. You can either set it in Julia by doing `ENV["$varname"]=something` or in your shell `export $varname=something`. + """ + @error error_string + "" + else + ENV[varname] + end +end + + +include("bot.jl") + +include("markov.jl") + +include("server.jl") + +import .Markov: initialize, make_sentence, list_usernames + +function run_server() + @info "Initializing bot" + Markov.initialize() + application = Server.application() + @info "Starting Telegram not now" + Bot.run_bot(Bot.TELEGRAM_BOT) do message + @debug "Got a message" message + request = Dict(:message=>message[:message], :params=>Dict()) + ret = application(request) + @debug "I got this ret" ret + if !isnothing(ret) + chat_id = message[:message][:chat][:id] + reply_to = message[:message][:message_id] + Bot.sendMessage(Bot.TELEGRAM_BOT, chat_id=chat_id, text=ret, reply_to_message_id=reply_to) + end + end + +end + +export run_server end diff --git a/src/bot.jl b/src/bot.jl new file mode 100644 index 0000000..c755867 --- /dev/null +++ b/src/bot.jl @@ -0,0 +1,26 @@ +module Bot + +export find_username + +using Logging +using Telegram, Telegram.API +import ..fetch_in_env + +TG_TOKEN = fetch_in_env("TG_TOKEN_MARKOV") +CHAT_ID = parse(Int, fetch_in_env("TG_CHAT_ID")) + +TELEGRAM_BOT = TelegramClient(TG_TOKEN, use_globally=false) + +function find_username(user_id) + global TELEGRAM_BOT, CHAT_ID + @debug "Fetching user" user_id + u = getChatMember(TELEGRAM_BOT, user_id=user_id, chat_id=CHAT_ID) + u[:user][:username] +end + +function register_command(commands...) + global TELEGRAM_BOT + setMyCommands(TELEGRAM_BOT, commands) +end + +end diff --git a/src/markov.jl b/src/markov.jl new file mode 100644 index 0000000..ea3089d --- /dev/null +++ b/src/markov.jl @@ -0,0 +1,223 @@ +module Markov + +import JSON +using StatsBase +using Logging +import HTTP + +using ..Bot +import ..initialize, ..make_sentence, .. list_usernames, ..fetch_in_env + +JSON_FILE = fetch_in_env("INPUT_MARKOV") + +Token = Union{String, Nothing} +USERIDS = Dict{String, Int64}() +ANALYSED_SINGLE = Dict{Int64, Dict{Token, Dict{Token, Float64}}}() +ANALYZED_FORWARD = Dict{Int64, Dict{Tuple{Token, Token}, Dict{Token, Float64}}}() +ANALYZED_BACKWARD = Dict{Int64, Dict{Tuple{Token, Token}, Dict{Token, Float64}}}() +INITIALIZED = false + +function reset_module() + global INITIALIZED, USERIDS, ANALYSED_SINGLE, ANALYZED_FORWARD, ANALYZED_BACKWARD + USERIDS = Dict{String, Int64}() + ANALYSED_SINGLE = Dict{Int64, Dict{Token, Dict{Token, Float64}}}() + ANALYZED_FORWARD = Dict{Int64, Dict{Tuple{Token, Token}, Dict{Token, Float64}}}() + ANALYZED_BACKWARD = Dict{Int64, Dict{Tuple{Token, Token}, Dict{Token, Float64}}}() + INITIALIZED = false +end + +function list_usernames() + global USERIDS + USERIDS |> keys |> collect +end + +function list_users() + global USERIDS + USERIDS |> values |> collect +end + +function register_user(username, user_id) + global USERIDS + USERIDS[username] = user_id +end +register_user(user_id; default="plop") = begin + username = try + find_username(user_id) + catch e + if isa(e, HTTP.ExceptionRequest.StatusError) + if isnothing(default) + default="deleted" + end + @debug "Could not find username for id $user_id , falling back to default : $default" + default + else + throw(e) + end + end + register_user(username, user_id) +end + +function analyse_line(words) + current=nothing + previous=nothing + result_forward = [] + result_backward = Pair{Tuple{Token,Token},Token}[(nothing,nothing)=>nothing] + result_single = [] + for w in words + result_forward = push!(result_forward, (previous, current)=>w) + result_backward = push!(result_backward, (current, w)=>previous) + result_single = push!(result_single, current=>w) + previous = current + current = w + end + push!(result_forward, (previous,current)=>nothing) + push!(result_forward, (current, nothing)=>nothing) + push!(result_backward, (current, nothing)=>previous) + push!(result_single, current=>nothing) + result_backward, result_forward, result_single +end + +function analyse_all_lines(lines) + probabilities_forward = Dict{Tuple{Token, Token}, Dict{Token, Float64}}() + probabilities_backward = Dict{Tuple{Token, Token}, Dict{Token, Float64}}() + probabilities_single = Dict{Token, Dict{Token, Float64}}() + for line in split.(lines) + analysed_backward, analysed_forward, analysed_single = analyse_line(line) + for a in analysed_forward + k = first(a) + v = last(a) + if k ∉ keys(probabilities_forward) + probabilities_forward[k] = Dict{Token, Float64}() + end + probabilities_forward[k][v] = get(probabilities_forward[k], v, 0) + 1 + end + for a in analysed_backward + k = first(a) + v = last(a) + if k ∉ keys(probabilities_backward) + probabilities_backward[k] = Dict{Token, Float64}() + end + probabilities_backward[k][v] = get(probabilities_backward[k], v, 0) + 1 + end + for a in analysed_single + k = first(a) + v = last(a) + if k ∉ keys(probabilities_single) + probabilities_single[k] = Dict{Token, Float64}() + end + probabilities_single[k][v] = get(probabilities_single[k], v, 0) + 1 + end + end + probabilities_backward, probabilities_forward, probabilities_single +end + +function initialize(input_file=JSON_FILE; reset=false) + if reset + reset_module() + end + global INITIALIZED + if INITIALIZED + return + end + messages = JSON.parsefile(input_file)["messages"] + user_lines = Dict{Int64, Array{String}}() + for message in messages + if !("from_id" in keys(message)) + continue + end + user_id = message["from_id"] + name = message["from"] + text = message["text"] + if typeof(text) == String && length(text)>0 + if !(user_id in values(USERIDS)) + register_user(user_id; default=name) + end + user_lines[user_id] = vcat(get(user_lines, user_id, []), text) + end + end + Threads.@threads for user in list_users() + ANALYZED_BACKWARD[user], ANALYZED_FORWARD[user], ANALYSED_SINGLE[user] = analyse_all_lines(user_lines[user]) + end + INITIALIZED = true +end + +function choose_next(user, current) + global ANALYSED_SINGLE + analysed_lines = ANALYSED_SINGLE[user] + items = collect(keys(analysed_lines[current])) + w = weights(collect(values(analysed_lines[current]))) + sample(items, w) +end + +function choose_next(user, previous, current) + global ANALYZED_FORWARD + analysed_lines = ANALYZED_FORWARD[user] + items = collect(keys(analysed_lines[(previous, current)])) + w = weights(collect(values(analysed_lines[(previous, current)]))) + sample(items, w) +end + +function choose_prev(user, current, next) + global ANALYZED_BACKWARD + analysed_lines = ANALYZED_BACKWARD[user] + items = collect(keys(analysed_lines[(current, next)])) + w = weights(collect(values(analysed_lines[(current, next)]))) + sample(items, w) +end + +function make_sentence_forward(user, word1, word2) + previous = word2 + current = choose_next(user, word1, word2) + result = [] + while !isnothing(current) + result = push!(result, current) + (previous, current) = (current, choose_next(user, previous, current)) + end + join(filter(!isnothing, result), " ") +end + +function make_sentence_backward(user, word1, word2) + next=word1 + current=choose_prev(user, word1, word2) + result = [] + while !isnothing(current) + result = pushfirst!(result, current) + (current, next) = (choose_prev(user, current, next), current) + end + join(filter(!isnothing, result), " ") +end + +make_sentence(user::Union{Nothing, String}=nothing, word1::Token=nothing, word2::Token=nothing) = begin + try + username, userid = find_user(user) + if !isnothing(word1) && isnothing(word2) + word2 = choose_next(userid, word1) + end + @debug "chose start" word1 word2 + start = make_sentence_backward(userid, word1, word2) + @debug "start done" start + finish = make_sentence_forward(userid, word1, word2) + @debug "finish done" finish + join(filter(!isnothing, ["<$username>", ":", start, word1, word2, finish]), " ") + catch e + if isa(e, KeyError) + @debug "Key error" e + "No luck, sorry." + else + throw(e) + end + end +end + +is_registered(userid) = begin + global USERIDS + userid in values(USERIDS) +end + +find_user(user::String) = begin + global USERIDS + user, USERIDS[user] +end +find_user(::Nothing) = find_user(list_usernames()[rand(1:end)]) + +end diff --git a/src/server.jl b/src/server.jl new file mode 100644 index 0000000..8efe02c --- /dev/null +++ b/src/server.jl @@ -0,0 +1,118 @@ +module Server + +using Markdown + +using Mux + +using ..Markov +using ..Bot + +# Middlewares + +function extract_chatid(app, req) + @debug "Looking for chan ID" req + req[:params][:chatid] = req[:message][:chat][:id] + app(req) +end + +function ignore_if_not_allowed(app, req) + + if req[:params][:chatid] == Bot.CHAT_ID + return app(req) + else + @debug "Ignoring request" + return nothing + end +end + +function extract_command(command, parameters...; botname="") + if command[1] == '/' + command = command[2:end] + end + function middleware(app, req) + if !(:entities in keys(req[:message])) + return app(req) + end + @debug "Looking for commands" + command_entities_id = findall(e->e[:type] == "bot_command", req[:message][:entities]) + @debug "There are commands" command_entities_id + @debug "Looking for commands names" + commands = map( + e->( + req[:message][:text][ + (req[:message][:entities][e][:offset]+1):(req[:message][:entities][e][:offset]+req[:message][:entities][e][:length]) + ], req[:message][:entities][e]), + command_entities_id + ) + @debug "Commands found" commands + first_command_id = findfirst( + c->c[1] == ("/" * command) || c[1] .== ("/" * command * "@" * botname), + commands + ) + if isnothing(first_command_id) + return app(req) + end + parameters_values = [] + command_offset = commands[first_command_id][2][:offset] + command_length = commands[first_command_id][2][:length] + end_of_text = split(req[:message][:text][command_offset+command_length+1:end]) + req[:command] = Dict() + req[:command][:name] = command + req[:command][:parameters] = Dict{Symbol, Union{Nothing, String}}(p=>nothing for p in parameters) + for (i,param) in enumerate(parameters) + if i > length(end_of_text) + break + end + @debug "Parameter attributed" param end_of_text[i] + req[:command][:parameters][param] = end_of_text[i] + end + app(req) + end +end + +function branch_to_callback(calback, command) + branching_f(req) = if :command in keys(req) + @debug "branching" req[:command][:name] command + req[:command][:name] == command + else + @debug "branching" req + false + end + branch(branching_f, calback) +end + +command(command, callback, parameters...; botname="") = stack(extract_command(command, parameters...; botname=botname), branch_to_callback(callback, command)) + + +# Endpoints + +function show_help(req) + """ + Hi ! I'm the Markovian bot. Here is what I can do : + * /help Prints this list of commands ! + * /talk [username [word]] Creates a Markov chain for the given `user` (chosen at random if not set) containing `word` (chosen at random if not set) + * /list Lists registered usernames. + + 🐺 + """ +end + +function list_usernames(req) + users = join(" * " .* sort(Markov.list_usernames()), "\n") + """ + Here are the users I know of : + """ * users +end + +function talk(req) + user = req[:command][:parameters][:user] + word = req[:command][:parameters][:word] + @debug "Time to talk" user word + Markov.make_sentence(user, word) +end + +authentication() = stack(extract_chatid, ignore_if_not_allowed) +commands() = stack(command("help", show_help), command("list", list_usernames), command("talk", talk, :user, :word)) +application() = mux(stack(authentication(), commands()), _->nothing) + +end