eng
competition

Text Practice Mode

This is who i;m ?

created Sep 26th, 16:08 by Anshul Kumar


1


Rating

414 words
4 completed
00:00
# Typing Practice ML Model for 10fastfingers
# ------------------------------------------
# This script learns which words you are most likely to mistype
# and recommends practice words for you.
#
# How to use:
# 1. Run it once (it uses simulated logs).
# 2. Later replace the synthetic logs with your real typing logs.
# 3. The model will adapt and generate new practice words.
 
import random, math, csv
from collections import Counter
 
try:
    from sklearn.feature_extraction import DictVectorizer
    from sklearn.linear_model import LogisticRegression
    SKLEARN_AVAILABLE = True
except:
    SKLEARN_AVAILABLE = False
 
# --------------------------
# 1. Simulate Typing Logs
# --------------------------
letters = list("abcdefghijklmnopqrstuvwxyz")
hard_letters = set(['t','r','y','u','i','o'])   # pretend these are harder
hard_bigrams = set(['th','tr','ri','yu','oi','ht'])
 
def simulate_log_entry(word):
    entries = []
    elapsed = 50
    for i,ch in enumerate(word):
        prev = word[i-1] if i>0 else '<s>'
        p = 0.01
        if ch in hard_letters: p += 0.12
        if (prev+ch) in hard_bigrams: p += 0.10
        if i==0 or i==len(word)-1: p -= 0.005
        is_upper = ch.isupper()
        if is_upper: p += 0.05
        error = 1 if random.random() < p else 0
        entries.append({
            'prev_char': prev.lower(),
            'char': ch.lower(),
            'position': i / max(1,len(word)-1),
            'is_upper': int(is_upper),
            'elapsed_ms': elapsed,
            'error': error
        })
        elapsed += 30
    return entries
 
sample_words = [
    "the","and","you","that","practice","keyboard","accuracy","mistake",
    "beautiful","synchronize","probability","algorithm","statistics",
    "performance","improve","predict","tough","rhythm"
]
 
logs = []
for _ in range(1000):
    w = random.choice(sample_words)
    logs.extend(simulate_log_entry(w))
 
# --------------------------
# 2. Feature Extraction
# --------------------------
def extract_features(entry):
    return {
        'char=' + entry['char']: 1,
        'prev_char=' + entry['prev_char']: 1,
        'bigram=' + entry['prev_char']+entry['char']: 1,
        'pos_bucket=' + str(int(entry['position']*4)): 1,
    }
 
X_dicts = [extract_features(e) for e in logs]
y = [e['error'] for e in logs]
 
# --------------------------
# 3. Train Model
# --------------------------
if SKLEARN_AVAILABLE:
    vec = DictVectorizer(sparse=False)
    X = vec.fit_transform(X_dicts)
    clf = LogisticRegression(max_iter=1000, class_weight='balanced', solver='liblinear')
    clf.fit(X, y)
    def predict_error_prob(entry):
        return float(clf.predict_proba(vec.transform([extract_features(entry)]))[0,1])
    print("Model: LogisticRegression")
else:
    counts, errors = Counter(), Counter()
    for d,label in zip(X_dicts,y):
        for k in d:
            counts[k]+=1
            if label: errors[k]+=1
    def predict_error_prob(entry):
        feats = extract_features(entry)
        rates = []
        for k in feats:
            rate = (errors[k]+1)/(counts[k]+2) if counts[k]>0 else 0.02
            rates.append(rate)
        return sum(rates)/len(rates)
    print("Model: Frequency baseline")
 
# --------------------------
# 4. Score Words
# --------------------------
def word_score(word):
    probs = []
    for i,ch in enumerate(word):
        prev = word[i-1] if i>0 else '<s>'
        entry = {
            'prev_char': prev.lower(),
            'char': ch.lower(),
            'position': i / max(1,len(word)-1),
            'is_upper': int(ch.isupper()),
            'elapsed_ms': 50
        }
        probs.append(predict_error_prob(entry))
    return sum(probs)
 
scored = [(w, word_score(w)) for w in sample_words]
scored.sort(key=lambda x: -x[1])
 
# --------------------------
# 5. Show Results
# --------------------------
print("\nTop Practice Words:\n")
for w,s in scored:
    print(f"{w:12s} {s:.3f}")
 
# Save to file
with open("practice_words.txt","w") as f:
    for w,s in scored:
        f.write(f"{w}\t{s:.6f}\n")
print("\nSaved to practice_words.txt")

saving score / loading statistics ...