wikiFactor is a formula to quantify how “impactful” a wiki is. This page shows how to calculate it and displays my wikiFactor statistics.

The name wikiFactor, and the formula for it, are originally from a paper by Carl McBride.

In [2]:
data = []

data += [("qotd_protocol", 5806)]
data += [("caffeine-halflife", 5707)]
data += [("rust_web_scraping", 4436)]
data += [("enabling_usb_midi_on_xiaomi_phones", 4433)]
data += [("hill_climbing", 2951)]
data += [("omegle_protocol", 2236)]
data += [("index", 2134)]
data += [("snpp_protocol", 1198)]
data += [("sshuttle", 1112)]
data += [("Gemini", 1048)]
data += [("Hellschreiber", 988)]
data += [("note_taking_systems", 870)]
data += [("carddav", 860)]

Wikifactor calculation

In [3]:
for i, row in enumerate(data):
    _, count = row
    if count < (i + 1) * 1000:
        print(f"Wikifactor is {i}")
        break
Out:
Wikifactor is 4
In [4]:
def hwikifactor(data, h):
    hwf = 0
    for name, count in data:
        if count >= h:
            hwf += 1
    return hwf >= h

max_hwf = len(data)

for hwf in range(1, max_hwf + 1)[::-1]:
    if hwikifactor(data, hwf):
        print(f"h-wikifactor is {hwf}")
        break
Out:
h-wikifactor is 13

Statistics from my wiki

Let’s load the data.

In [5]:
import sqlite3
import datetime

con = sqlite3.connect(":memory:")

_ = con.execute("create table pageviews (date text, page text)")
_ = con.execute("create index pageviews1 on pageviews(date)")

with open("/home/leo/external/tmp/gkbrk.com.log") as f:
    for line in f:
        try:
            line = line.strip()
            parts = line.split('|')
            if parts[1] != "200":
                continue
            url = parts[7]
            if not (url.startswith("https://www.gkbrk.com/wiki") or url.startswith("https://gkbrk.com/wiki")):
                continue
            date = datetime.datetime.utcfromtimestamp(int(parts[2]) / 1000).strftime("%Y%m%d")
            _ = con.execute("insert into pageviews (date, page) values (?, ?)", (date, url))
        except Exception:
            pass
In [6]:
end_date = datetime.date.today()
start_date = datetime.date(2020, 3, 15)

date = start_date

hwfplot = []
wfplot = []

while date < end_date:
    data = []
    for row in con.execute('select count(*) from pageviews where date <= ? group by page', (date.strftime("%Y%m%d"),)):
        data.append(row[0])
    data.sort(reverse=True)
    for i, count in enumerate(data):
        if count < (i + 1) * 1000:
            wfplot.append(i)
            break
    
    def hwikifactor(data, h):
        hwf = 0
        for count in data:
            if count >= h:
                hwf += 1
        return hwf >= h

    max_hwf = len(data)

    for hwf in range(1, max_hwf + 1)[::-1]:
        if hwikifactor(data, hwf):
            hwfplot.append(hwf)
            break
    date += datetime.timedelta(days=1)

_ = plt.plot(hwfplot)
_ = plt.twinx().plot(wfplot, color='orange')

_ = plt.plot([])
_ = plt.legend(["wikiFactor", "h-wikiFactor"])
Out:
<Figure size 768x576 with 2 Axes>