From 3d8e5044d683989f490dc120a4fb045b88cabfa6 Mon Sep 17 00:00:00 2001
From: alpha-nerd-nomyo <alpha-nerd@nomyo.ai>
Date: Fri, 6 Mar 2026 16:00:19 +0100
Subject: [PATCH] doc: correction

---
 README.md      | 56 +++++++++++++++++++++++++++-----------------------
 pyproject.toml |  1 +
 2 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md
index 90fa683..5cb3b71 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,6 @@
 
 **Async semantic caching for LLM API calls — reduce costs with one decorator.**
 
-[![PyPI](https://img.shields.io/pypi/v/semantic-llm-cache)](https://pypi.org/project/semantic-llm-cache/)
 [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
 [![Python](https://img.shields.io/pypi/pyversions/semantic-llm-cache)](https://pypi.org/project/semantic-llm-cache/)
 
@@ -21,16 +20,17 @@ LLM API calls are expensive and slow. In production applications, **20-40% of pr
 
 ## What changed from the original
 
+
 | Area                 | Original                  | This fork                                                           |
-| -------------------- | ------------------------- | ------------------------------------------------------------------- |
+| ---------------------- | --------------------------- | --------------------------------------------------------------------- |
 | Backends             | sync (`sqlite3`, `redis`) | async (`aiosqlite`, `redis.asyncio`)                                |
 | `@cache` decorator   | sync only                 | auto-detects async/sync                                             |
-| `EmbeddingCache`     | sync `encode()`           | adds `async aencode()` via `asyncio.to_thread`                      |
-| `CacheContext`       | sync only                 | supports both `with` and `async with`                               |
-| `CachedLLM`          | `chat()`                  | adds `achat()`                                                      |
+| `EmbeddingCache`     | sync`encode()`            | adds`async aencode()` via `asyncio.to_thread`                       |
+| `CacheContext`       | sync only                 | supports both`with` and `async with`                                |
+| `CachedLLM`          | `chat()`                  | adds`achat()`                                                       |
 | Utility functions    | sync                      | `clear_cache`, `invalidate`, `warm_cache`, `export_cache` all async |
-| `StorageBackend` ABC | sync abstract methods     | all abstract methods are `async def`                                |
-| Min Python           | 3.9                       | 3.10 (uses `X \| Y` union syntax)                                   |
+| `StorageBackend` ABC | sync abstract methods     | all abstract methods are`async def`                                 |
+| Min Python           | 3.9                       | 3.10 (uses`X | Y` union syntax)                                     |
 
 ## Installation
 
@@ -198,14 +198,15 @@ async def my_llm_function(prompt: str) -> str:
 
 ### Parameters
 
-| Parameter    | Type          | Default     | Description                                               |
-| ------------ | ------------- | ----------- | --------------------------------------------------------- |
-| `similarity` | `float`       | `1.0`       | Cosine similarity threshold (1.0 = exact, 0.9 = semantic) |
-| `ttl`        | `int \| None` | `3600`      | Time-to-live in seconds (None = never expires)            |
-| `backend`    | `Backend`     | `None`      | Storage backend (None = in-memory)                        |
-| `namespace`  | `str`         | `"default"` | Isolate different use cases                               |
-| `enabled`    | `bool`        | `True`      | Enable/disable caching                                    |
-| `key_func`   | `Callable`    | `None`      | Custom cache key function                                 |
+
+| Parameter    | Type         | Default     | Description                                               |
+| -------------- | -------------- | ------------- | ----------------------------------------------------------- |
+| `similarity` | `float`      | `1.0`       | Cosine similarity threshold (1.0 = exact, 0.9 = semantic) |
+| `ttl`        | `int | None` | `3600`      | Time-to-live in seconds (None = never expires)            |
+| `backend`    | `Backend`    | `None`      | Storage backend (None = in-memory)                        |
+| `namespace`  | `str`        | `"default"` | Isolate different use cases                               |
+| `enabled`    | `bool`       | `True`      | Enable/disable caching                                    |
+| `key_func`   | `Callable`   | `None`      | Custom cache key function                                 |
 
 ### Utility Functions
 
@@ -221,19 +222,21 @@ from semantic_llm_cache.stats import (
 
 ## Backends
 
-| Backend         | Description                          | I/O                       |
-| --------------- | ------------------------------------ | ------------------------- |
+
+| Backend         | Description                          | I/O                        |
+| ----------------- | -------------------------------------- | ---------------------------- |
 | `MemoryBackend` | In-memory LRU (default)              | none — runs in event loop |
-| `SQLiteBackend` | Persistent, file-based (`aiosqlite`) | async non-blocking        |
-| `RedisBackend`  | Distributed (`redis.asyncio`)        | async non-blocking        |
+| `SQLiteBackend` | Persistent, file-based (`aiosqlite`) | async non-blocking         |
+| `RedisBackend`  | Distributed (`redis.asyncio`)        | async non-blocking         |
 
 ## Embedding Providers
 
-| Provider                      | Quality                      | Notes                       |
-| ----------------------------- | ---------------------------- | --------------------------- |
-| `DummyEmbeddingProvider`      | hash-only, no semantic match | zero deps, default          |
-| `SentenceTransformerProvider` | high (local model)           | requires `[semantic]` extra |
-| `OpenAIEmbeddingProvider`     | high (API)                   | requires `[openai]` extra   |
+
+| Provider                      | Quality                      | Notes                      |
+| ------------------------------- | ------------------------------ | ---------------------------- |
+| `DummyEmbeddingProvider`      | hash-only, no semantic match | zero deps, default         |
+| `SentenceTransformerProvider` | high (local model)           | requires`[semantic]` extra |
+| `OpenAIEmbeddingProvider`     | high (API)                   | requires`[openai]` extra   |
 
 Embedding inference is offloaded via `asyncio.to_thread` — model loading is blocking and should be done at application startup, not on first request.
 
@@ -250,8 +253,9 @@ embedding = await embedding_cache.aencode("my prompt")
 
 ## Performance
 
+
 | Metric                     | Value                                    |
-| -------------------------- | ---------------------------------------- |
+| ---------------------------- | ------------------------------------------ |
 | Cache hit latency          | <10ms                                    |
 | Embedding overhead on miss | ~50ms (sentence-transformers, offloaded) |
 | Typical hit rate           | 25-40%                                   |
@@ -275,5 +279,5 @@ MIT — see [LICENSE](LICENSE).
 
 ## Credits
 
-Original library by **Karthick Raja M** ([@karthyick](https://github.com/karthyick)).  
+Original library by **Karthick Raja M** ([@karthyick](https://github.com/karthyick)).
 Async conversion by this fork.
diff --git a/pyproject.toml b/pyproject.toml
index c0c3a32..4683d94 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,6 +20,7 @@ keywords = [
     "openai",
     "anthropic",
     "ollama",
+    "llama.cpp",
     "prompt",
     "optimization",
     "cost-reduction",