๐ promote.py detalhado
50 linhas Python. Todo o codigo comentado.
๐ promote.py
#!/usr/bin/env python3
# Le knowledge/, detecta padroes repetidos, propoe regras.
import sys, re
from pathlib import Path
from datetime import datetime
from collections import Counter
def normalize(text):
return re.sub(r'[^\w\s-]', '', text.lower())[:200]
def find_patterns(knowledge_dir, threshold=3):
buckets = {}
for md in knowledge_dir.rglob('*.md'):
if 'promotions' in md.parts: continue
first_para = md.read_text().split('\n\n', 1)[0]
norm = normalize(first_para)
buckets.setdefault(norm, []).append(md)
return {k: v for k, v in buckets.items() if len(v) >= threshold}
def write_proposal(patterns, out_path):
lines = [f'# Promocoes pendentes ({datetime.now()})\n']
for i, (norm, paths) in enumerate(patterns.items(), 1):
lines.append(f'## Proposta {i} ({len(paths)} ocorrencias)')
lines.append(f'Padrao: {norm[:150]}')
lines.append('Origens:')
for p in paths: lines.append(f'- {p}')
lines.append('Regra sugerida (edite antes de aprovar):')
lines.append('"Sempre [acao] quando [condicao]."\n')
out_path.write_text('\n'.join(lines))
return len(patterns)
if __name__ == '__main__':
knowledge = Path(sys.argv[1]).expanduser()
patterns = find_patterns(knowledge)
count = write_proposal(patterns, knowledge / 'promotions' / 'pending.md')
print(f'Padroes detectados: {count}')๐ก Substitua regex por embeddings
Versao atual usa normalizacao + hash. Upgrade natural: fastembed + clustering semantico. Melhor precisao.
๐ decay.py detalhado
Aplica curva de Ebbinghaus com salience. Roda cron semanal.
๐ decay.py nucleo
#!/usr/bin/env python3
import sys, shutil
from pathlib import Path
from datetime import datetime
THRESHOLDS = {
'conservador': (14, 60, 180),
'moderado': (7, 30, 90),
'agressivo': (3, 14, 45),
}
def age_days(meta, now):
created = datetime.fromisoformat(meta.get('created', now.isoformat()))
return (now - created).days
def salience(meta):
return 1.0 + (int(meta.get('access_count', 0)) * 0.2)
def apply_decay(md, mode='moderado'):
t_summary, t_oneliner, t_archive = THRESHOLDS[mode]
meta, body = parse_frontmatter(md.read_text())
effective_age = age_days(meta, datetime.now()) / salience(meta)
if effective_age >= t_archive:
archive(md); return 'archived'
if effective_age >= t_oneliner:
md.write_text(render(meta, one_liner(body))); return 'oneliner'
if effective_age >= t_summary:
md.write_text(render(meta, compress(body, 0.5))); return 'summary'
return 'kept'
if __name__ == '__main__':
knowledge = Path(sys.argv[1]).expanduser()
counts = {'kept': 0, 'summary': 0, 'oneliner': 0, 'archived': 0}
for md in knowledge.rglob('*.md'):
if 'archive' in md.parts: continue
counts[apply_decay(md)] += 1
for k, v in counts.items(): print(f'{k}: {v}')๐ก Lab completo
labs/decay-py/decay.py tem implementacao completa com parse_frontmatter, render, one_liner, compress. Use como base.
๐ search.py: multi-sinal
Busca hibrida em SQLite + fastembed. CLI util e chamavel por UserPromptSubmit.
๐ search.py
#!/usr/bin/env python3
import sqlite3, sqlite_vec
from fastembed import TextEmbedding
class Search:
def __init__(self, db_path):
self.db = sqlite3.connect(db_path)
self.db.enable_load_extension(True)
sqlite_vec.load(self.db)
self.embed = TextEmbedding()
def run(self, query, top_k=3):
# Keyword (FTS5)
kw_rows = self.db.execute('''
SELECT id, rank FROM memories_fts
WHERE memories_fts MATCH ? LIMIT 10''', (query,)).fetchall()
# Semantic
q_emb = list(self.embed.embed([query]))[0].tolist()
sem_rows = self.db.execute('''
SELECT rowid, distance FROM vec_memories
WHERE embedding MATCH ? ORDER BY distance LIMIT 10''',
(q_emb,)).fetchall()
# Merge + rerank
scores = {}
for id_, rank in kw_rows: scores[id_] = scores.get(id_, 0) + 0.5 / (rank+1)
for id_, d in sem_rows: scores[id_] = scores.get(id_, 0) + 0.5 * (1-d)
top_ids = sorted(scores, key=scores.get, reverse=True)[:top_k]
return self._fetch_bodies(top_ids)
if __name__ == '__main__':
import sys
results = Search('~/.memory/memory.db').run(sys.argv[1])
for r in results: print(r)๐งฉ hook_runner.sh: orquestrador
Wrapper bash que isola Claude Code do seu codigo Python.
๐ hook_runner.sh
#!/usr/bin/env bash
# Orquestrador chamado pelo settings.json
set -e
EVENT="$1"
INPUT=$(cat)
case "$EVENT" in
session_start)
python3 ~/.memory/scripts/session_start.py "$INPUT"
;;
pre_compact)
python3 ~/.memory/scripts/pre_compact.py "$INPUT"
;;
user_prompt)
python3 ~/.memory/scripts/user_prompt.py "$INPUT"
;;
session_end)
# Async via nohup
nohup python3 ~/.memory/scripts/session_end.py "$INPUT" \
> /tmp/memory_session_end.log 2>&1 &
;;
*)
echo "{}"
;;
esac
# Em settings.json:
# "command": "$HOME/.memory/hooks/hook_runner.sh session_start"๐ก Abstracao vale
Tem 1 bash simples no Claude Code; 1 Python testado por cada hook. Trocar de linguagem do Python nao afeta o Claude.
๐งช Testes unitarios
Sistema de memoria sem teste envelhece mal. Pytest minimo salva em 6 meses.
๐ tests/test_memory.py
import pytest
from pathlib import Path
from scripts.promote import find_patterns, normalize
from scripts.decay import salience, age_days
def test_normalize():
assert normalize('Hello World!') == 'hello world'
def test_salience_baseline():
assert salience({'access_count': '0'}) == 1.0
def test_salience_with_access():
assert salience({'access_count': '5'}) == 2.0
def test_find_patterns_below_threshold(tmp_path):
(tmp_path / 'a.md').write_text('Content X')
(tmp_path / 'b.md').write_text('Content Y')
patterns = find_patterns(tmp_path, threshold=3)
assert len(patterns) == 0
def test_find_patterns_meets_threshold(tmp_path):
for name in ['a.md', 'b.md', 'c.md']:
(tmp_path / name).write_text('Identical content')
patterns = find_patterns(tmp_path, threshold=3)
assert len(patterns) == 1
# Rodar: pytest tests/๐ก CI basico
GitHub Actions rodando pytest a cada commit. Evita regressao quando voce refatora em 3 meses.
๐ฆ Packaging: tudo em labs/
Curso empacotou scripts funcionais em labs/. Copy, edite, rode.
๐ Estrutura de labs/
labs/
โโโ palavra-codigo/ (experimento A/B)
โโโ prime-md/ (template)
โโโ context-md/ (template)
โโโ promote-py/
โ โโโ promote.py (100% funcional)
โโโ decay-py/
โ โโโ decay.py (100% funcional)
โโโ gemini-summary/
โโโ summarize_session.py (100% funcional)
# Uso direto
$ python3 labs/promote-py/promote.py ~/.memory/knowledge
$ python3 labs/decay-py/decay.py ~/.memory/knowledge moderado
$ cat transcript.jsonl | python3 labs/gemini-summary/summarize_session.py๐ก Scripts vivem no labs
Ao inves de reescrever, simlink. ln -s labs/promote-py/promote.py ~/.memory/scripts/. Atualiza automaticamente quando curso atualiza.
๐ Resumo do Modulo
Proximo:
6.4 โ SessionEnd + Gemini Flash