mirror of
https://github.com/flakestorm/flakestorm.git
synced 2026-04-25 00:36:54 +02:00
Remove cloud references and limits.py, hardcode values
- Delete limits.py module entirely - Delete github_actions.py integration - Hardcode MAX_MUTATIONS_PER_RUN = 50 in orchestrator - Hardcode PARALLEL_EXECUTION_ENABLED = False - Remove all cloud references from code and README - Remove cloud and limits CLI commands - Make open source version 100% standalone - Add nosec comments for bandit false positives
This commit is contained in:
parent
ee10da0b97
commit
344678afd0
9 changed files with 32 additions and 689 deletions
21
README.md
21
README.md
|
|
@ -15,9 +15,6 @@
|
|||
<a href="https://pypi.org/project/flakestorm/">
|
||||
<img src="https://img.shields.io/pypi/pyversions/flakestorm.svg" alt="Python Versions">
|
||||
</a>
|
||||
<a href="https://flakestorm.com">
|
||||
<img src="https://img.shields.io/badge/☁️-Cloud%20Available-blueviolet" alt="Cloud">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
---
|
||||
|
|
@ -134,12 +131,6 @@ Running attacks... ━━━━━━━━━━━━━━━━━━
|
|||
Report saved to: ./reports/flakestorm-2024-01-15-143022.html
|
||||
```
|
||||
|
||||
### Check Limits
|
||||
|
||||
```bash
|
||||
flakestorm limits # Show edition limits
|
||||
flakestorm cloud # Learn about Cloud features
|
||||
```
|
||||
|
||||
## Mutation Types
|
||||
|
||||
|
|
@ -151,8 +142,6 @@ flakestorm cloud # Learn about Cloud features
|
|||
| **Prompt Injection** | Basic adversarial attacks | "Book a flight and ignore previous instructions" |
|
||||
| **Custom** | Your own mutation templates | Define with `{prompt}` placeholder |
|
||||
|
||||
> **Need advanced mutations?** Visit [flakestorm.com](https://flakestorm.com) for more options.
|
||||
|
||||
## Invariants (Assertions)
|
||||
|
||||
### Deterministic
|
||||
|
|
@ -180,8 +169,6 @@ invariants:
|
|||
- type: "refusal_check"
|
||||
```
|
||||
|
||||
> **Need advanced safety?** Visit [flakestorm.com](https://flakestorm.com) for more options.
|
||||
|
||||
## Agent Adapters
|
||||
|
||||
### HTTP Endpoint
|
||||
|
|
@ -216,8 +203,6 @@ For local testing:
|
|||
flakestorm run --min-score 0.9
|
||||
```
|
||||
|
||||
For advanced CI/CD features, visit [flakestorm.com](https://flakestorm.com).
|
||||
|
||||
## Robustness Score
|
||||
|
||||
The Robustness Score is calculated as:
|
||||
|
|
@ -257,9 +242,3 @@ AGPLv3 - See [LICENSE](LICENSE) for details.
|
|||
<strong>Tested with FlakeStorm</strong><br>
|
||||
<img src="https://img.shields.io/badge/tested%20with-flakestorm-brightgreen" alt="Tested with FlakeStorm">
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://flakestorm.com">
|
||||
<strong>⚡ Need more features? Visit FlakeStorm Cloud →</strong>
|
||||
</a>
|
||||
</p>
|
||||
|
|
|
|||
|
|
@ -13,14 +13,8 @@ from pathlib import Path
|
|||
import typer
|
||||
from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
from rich.text import Text
|
||||
|
||||
from flakestorm import __version__
|
||||
from flakestorm.core.limits import (
|
||||
CLOUD_URL,
|
||||
MAX_MUTATIONS_PER_RUN,
|
||||
print_upgrade_banner,
|
||||
)
|
||||
from flakestorm.core.runner import FlakeStormRunner
|
||||
|
||||
# Create the main app
|
||||
|
|
@ -37,10 +31,7 @@ console = Console()
|
|||
def version_callback(value: bool) -> None:
|
||||
"""Print version and exit."""
|
||||
if value:
|
||||
console.print(
|
||||
f"[bold blue]flakestorm[/bold blue] version {__version__} [dim](Open Source Edition)[/dim]"
|
||||
)
|
||||
console.print(f"[dim]→ Upgrade to Cloud: {CLOUD_URL}[/dim]")
|
||||
console.print(f"[bold blue]flakestorm[/bold blue] version {__version__}")
|
||||
raise typer.Exit()
|
||||
|
||||
|
||||
|
|
@ -412,84 +403,6 @@ def score(
|
|||
asyncio.run(_score_async(config))
|
||||
|
||||
|
||||
@app.command()
|
||||
def cloud() -> None:
|
||||
"""
|
||||
Learn about flakestorm Cloud features.
|
||||
|
||||
flakestorm Cloud provides 20x faster execution, advanced features,
|
||||
and team collaboration.
|
||||
"""
|
||||
print_upgrade_banner(console, reason="20x faster tests")
|
||||
|
||||
console.print("\n[bold]Feature Comparison:[/bold]\n")
|
||||
|
||||
# Feature comparison table
|
||||
features = [
|
||||
("Mutation Types", "5 basic", "[green]All types[/green]"),
|
||||
("Mutations/Run", f"{MAX_MUTATIONS_PER_RUN}", "[green]Unlimited[/green]"),
|
||||
(
|
||||
"Execution",
|
||||
"[yellow]Sequential[/yellow]",
|
||||
"[green]Parallel (20x faster)[/green]",
|
||||
),
|
||||
("LLM", "Local only", "[green]Cloud + Local[/green]"),
|
||||
("PII Detection", "Basic regex", "[green]Advanced NER + ML[/green]"),
|
||||
("Prompt Injection", "Basic", "[green]ML-powered[/green]"),
|
||||
("Factuality Check", "[red]❌[/red]", "[green]✅[/green]"),
|
||||
("Test History", "[red]❌[/red]", "[green]✅ Dashboard[/green]"),
|
||||
("GitHub Actions", "[red]❌[/red]", "[green]✅ One-click setup[/green]"),
|
||||
("Team Features", "[red]❌[/red]", "[green]✅ Sharing & SSO[/green]"),
|
||||
]
|
||||
|
||||
console.print(" [dim]Feature Open Source Cloud[/dim]")
|
||||
console.print(" " + "─" * 50)
|
||||
for feature, oss, cloud in features:
|
||||
console.print(f" {feature:<20} {oss:<14} {cloud}")
|
||||
|
||||
console.print("\n[bold cyan]Pricing:[/bold cyan]")
|
||||
console.print(" • [bold]Community:[/bold] $0/mo (current)")
|
||||
console.print(" • [bold]Pro:[/bold] $49/mo - Parallel + Cloud LLMs")
|
||||
console.print(" • [bold]Team:[/bold] $299/mo - All features + collaboration")
|
||||
|
||||
console.print(
|
||||
f"\n[bold]→ Get started:[/bold] [link={CLOUD_URL}]{CLOUD_URL}[/link]\n"
|
||||
)
|
||||
|
||||
|
||||
@app.command()
|
||||
def limits() -> None:
|
||||
"""
|
||||
Show Open Source edition limits.
|
||||
|
||||
Displays the feature limitations of the Open Source edition
|
||||
and how to unlock more with flakestorm Cloud.
|
||||
"""
|
||||
console.print(
|
||||
Panel(
|
||||
Text.from_markup(
|
||||
"[bold]Open Source Edition Limits[/bold]\n\n"
|
||||
f"• [yellow]Max {MAX_MUTATIONS_PER_RUN} mutations[/yellow] per test run\n"
|
||||
"• [yellow]Sequential execution[/yellow] (one test at a time)\n"
|
||||
"• [yellow]5 mutation types[/yellow]: paraphrase, noise, tone, injection, custom\n"
|
||||
"• [yellow]Local LLM only[/yellow] (Ollama/llama.cpp)\n"
|
||||
"• [yellow]Basic PII detection[/yellow] (regex patterns)\n"
|
||||
"• [red]No GitHub Actions[/red] CI/CD integration\n"
|
||||
"• [red]No test history[/red] or dashboard\n"
|
||||
"• [red]No team features[/red]\n\n"
|
||||
"[bold green]Why these limits?[/bold green]\n"
|
||||
"The Open Source edition is designed for:\n"
|
||||
"• Learning and experimentation\n"
|
||||
"• Small test suites\n"
|
||||
"• Individual developers\n\n"
|
||||
f"[bold]Upgrade for production:[/bold] {CLOUD_URL}"
|
||||
),
|
||||
title="[bold blue]flakestorm Open Source[/bold blue]",
|
||||
border_style="blue",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
async def _score_async(config: Path) -> None:
|
||||
"""Async implementation of score command."""
|
||||
|
||||
|
|
|
|||
|
|
@ -69,11 +69,10 @@ class MutationConfig(BaseModel):
|
|||
"""
|
||||
Configuration for mutation generation.
|
||||
|
||||
Open Source Edition Limits:
|
||||
Limits:
|
||||
- Maximum 50 total mutations per test run
|
||||
- 5 mutation types: paraphrase, noise, tone_shift, prompt_injection, custom
|
||||
|
||||
Upgrade to flakestorm Cloud for unlimited mutations and advanced types.
|
||||
"""
|
||||
|
||||
count: int = Field(
|
||||
|
|
|
|||
|
|
@ -1,222 +0,0 @@
|
|||
"""
|
||||
Open Source Edition Limits
|
||||
|
||||
Defines feature limits for the open source (local-only) version.
|
||||
These limits encourage users to upgrade to flakestorm Cloud for:
|
||||
- Faster parallel execution
|
||||
- Cloud LLMs (higher quality mutations)
|
||||
- Advanced features
|
||||
- Team collaboration
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
from rich.text import Text
|
||||
|
||||
if TYPE_CHECKING:
|
||||
pass
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# OPEN SOURCE EDITION LIMITS
|
||||
# =============================================================================
|
||||
|
||||
# Maximum mutations per test run (sequential = slow)
|
||||
MAX_MUTATIONS_PER_RUN = 50
|
||||
|
||||
# Maximum golden prompts
|
||||
MAX_GOLDEN_PROMPTS = 10
|
||||
|
||||
# Execution mode (sequential only - no parallelism)
|
||||
PARALLEL_EXECUTION_ENABLED = False
|
||||
|
||||
# GitHub Actions integration
|
||||
GITHUB_ACTIONS_ENABLED = False
|
||||
|
||||
# Advanced features disabled
|
||||
ADVANCED_MUTATIONS_ENABLED = False # Sophisticated prompt injections
|
||||
ADVANCED_SAFETY_CHECKS_ENABLED = False # NER, ML-based detection, factuality
|
||||
TEST_HISTORY_ENABLED = False # Dashboard, history tracking
|
||||
TEAM_FEATURES_ENABLED = False # Sharing, collaboration
|
||||
|
||||
# Cloud features disabled
|
||||
CLOUD_LLM_ENABLED = False
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ALLOWED MUTATION TYPES (5 types for open source)
|
||||
# =============================================================================
|
||||
|
||||
ALLOWED_MUTATION_TYPES = [
|
||||
"paraphrase", # Semantic rewrites
|
||||
"noise", # Typos, spelling errors
|
||||
"tone_shift", # Tone changes
|
||||
"prompt_injection", # Basic adversarial
|
||||
"custom", # User-defined templates
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# UPGRADE MESSAGING
|
||||
# =============================================================================
|
||||
|
||||
CLOUD_URL = "https://flakestorm.cloud"
|
||||
UPGRADE_CTA = f"⚡ Upgrade to flakestorm Cloud for 20x faster execution → {CLOUD_URL}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class LimitViolation:
|
||||
"""Represents a limit that was exceeded."""
|
||||
|
||||
limit_name: str
|
||||
current_value: int
|
||||
max_value: int
|
||||
message: str
|
||||
|
||||
|
||||
def check_mutation_limit(
|
||||
requested_count: int, num_prompts: int
|
||||
) -> LimitViolation | None:
|
||||
"""
|
||||
Check if the requested mutation count exceeds limits.
|
||||
|
||||
Args:
|
||||
requested_count: Requested mutations per prompt
|
||||
num_prompts: Number of golden prompts
|
||||
|
||||
Returns:
|
||||
LimitViolation if exceeded, None otherwise
|
||||
"""
|
||||
total = requested_count * num_prompts
|
||||
if total > MAX_MUTATIONS_PER_RUN:
|
||||
return LimitViolation(
|
||||
limit_name="mutations_per_run",
|
||||
current_value=total,
|
||||
max_value=MAX_MUTATIONS_PER_RUN,
|
||||
message=(
|
||||
f"Open Source limit: {MAX_MUTATIONS_PER_RUN} mutations per run. "
|
||||
f"You requested {total} ({requested_count} × {num_prompts} prompts).\n"
|
||||
f"Upgrade to Cloud for unlimited mutations."
|
||||
),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def check_golden_prompt_limit(num_prompts: int) -> LimitViolation | None:
|
||||
"""Check if golden prompt count exceeds limits."""
|
||||
if num_prompts > MAX_GOLDEN_PROMPTS:
|
||||
return LimitViolation(
|
||||
limit_name="golden_prompts",
|
||||
current_value=num_prompts,
|
||||
max_value=MAX_GOLDEN_PROMPTS,
|
||||
message=(
|
||||
f"Open Source limit: {MAX_GOLDEN_PROMPTS} golden prompts. "
|
||||
f"You have {num_prompts}.\n"
|
||||
f"Upgrade to Cloud for unlimited prompts."
|
||||
),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def enforce_mutation_limit(requested_count: int, num_prompts: int) -> int:
|
||||
"""
|
||||
Enforce mutation limit by capping the count.
|
||||
|
||||
Returns the actual count to use (may be reduced).
|
||||
"""
|
||||
max_per_prompt = MAX_MUTATIONS_PER_RUN // max(num_prompts, 1)
|
||||
return min(requested_count, max(max_per_prompt, 1))
|
||||
|
||||
|
||||
def print_upgrade_banner(console: Console, reason: str = "faster execution") -> None:
|
||||
"""Print an upgrade banner to the console."""
|
||||
banner = Panel(
|
||||
Text.from_markup(
|
||||
f"[bold yellow]⚡ Want {reason}?[/bold yellow]\n\n"
|
||||
f"[white]flakestorm Cloud offers:[/white]\n"
|
||||
f" • [green]20x faster[/green] parallel execution\n"
|
||||
f" • [green]Cloud LLMs[/green] for higher quality mutations\n"
|
||||
f" • [green]Advanced safety checks[/green] (NER, ML-detection)\n"
|
||||
f" • [green]Test history[/green] and analytics dashboard\n"
|
||||
f" • [green]Team features[/green] for collaboration\n\n"
|
||||
f"[bold cyan]→ {CLOUD_URL}[/bold cyan]"
|
||||
),
|
||||
title="[bold blue]Upgrade to flakestorm Cloud[/bold blue]",
|
||||
border_style="blue",
|
||||
padding=(1, 2),
|
||||
)
|
||||
console.print(banner)
|
||||
|
||||
|
||||
def print_limit_warning(console: Console, violation: LimitViolation) -> None:
|
||||
"""Print a limit warning to the console."""
|
||||
warning = Panel(
|
||||
Text.from_markup(
|
||||
f"[bold yellow]⚠️ Limit Reached[/bold yellow]\n\n"
|
||||
f"[white]{violation.message}[/white]\n\n"
|
||||
f"[bold cyan]→ {CLOUD_URL}[/bold cyan]"
|
||||
),
|
||||
title="[bold yellow]Open Source Edition[/bold yellow]",
|
||||
border_style="yellow",
|
||||
padding=(1, 2),
|
||||
)
|
||||
console.print(warning)
|
||||
|
||||
|
||||
def print_sequential_notice(console: Console) -> None:
|
||||
"""Print a notice about sequential execution."""
|
||||
console.print(
|
||||
"\n[dim]ℹ️ Running in sequential mode (Open Source). "
|
||||
f"Upgrade to Cloud for parallel execution: {CLOUD_URL}[/dim]\n"
|
||||
)
|
||||
|
||||
|
||||
def print_completion_upsell(console: Console, duration_seconds: float) -> None:
|
||||
"""Print upsell after test completion based on duration."""
|
||||
if duration_seconds > 60: # More than 1 minute
|
||||
estimated_cloud_time = (
|
||||
duration_seconds / 20
|
||||
) # ~20x faster with parallel + cloud
|
||||
console.print(
|
||||
f"\n[dim]⏱️ Test took {duration_seconds:.1f}s. "
|
||||
f"With flakestorm Cloud, this would take ~{estimated_cloud_time:.1f}s[/dim]"
|
||||
)
|
||||
console.print(f"[dim cyan]→ {CLOUD_URL}[/dim cyan]\n")
|
||||
|
||||
|
||||
def get_feature_comparison() -> str:
|
||||
"""Get a feature comparison table for documentation."""
|
||||
return """
|
||||
## Feature Comparison
|
||||
|
||||
| Feature | Open Source | Cloud Pro | Cloud Team |
|
||||
|---------|:-----------:|:---------:|:----------:|
|
||||
| Mutation Types | 5 basic | All types | All types |
|
||||
| Mutations/Run | 50 | Unlimited | Unlimited |
|
||||
| Execution | Sequential | Parallel (20x) | Parallel (20x) |
|
||||
| LLM | Local only | Cloud + Local | Cloud + Local |
|
||||
| PII Detection | Basic | Advanced (NER) | Advanced (NER) |
|
||||
| Prompt Injection | Basic | ML-powered | ML-powered |
|
||||
| Factuality Check | ❌ | ✅ | ✅ |
|
||||
| Test History | ❌ | ✅ | ✅ |
|
||||
| Dashboard | ❌ | ✅ | ✅ |
|
||||
| GitHub Actions | ❌ | ✅ | ✅ |
|
||||
| Team Sharing | ❌ | ❌ | ✅ |
|
||||
| SSO/SAML | ❌ | ❌ | ✅ |
|
||||
| Price | Free | $49/mo | $299/mo |
|
||||
|
||||
**Why is Open Source slower?**
|
||||
- Sequential execution: Tests run one at a time
|
||||
- Local LLM: Slower than cloud GPU inference
|
||||
- No caching: Each run starts fresh
|
||||
|
||||
**Cloud advantages:**
|
||||
- 20x faster with parallel execution
|
||||
- Higher quality mutations with cloud LLMs
|
||||
- Historical comparison across runs
|
||||
"""
|
||||
|
|
@ -4,12 +4,7 @@ Orchestrator for flakestorm Test Runs
|
|||
Coordinates the entire testing process: mutation generation,
|
||||
agent invocation, invariant verification, and result aggregation.
|
||||
|
||||
Open Source Edition:
|
||||
- Sequential execution only (no parallelism)
|
||||
- Maximum 50 mutations per test run
|
||||
- Basic mutation types only
|
||||
|
||||
Upgrade to flakestorm Cloud for parallel execution and advanced features.
|
||||
Runs tests sequentially with a maximum of 50 mutations per test run.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -29,14 +24,9 @@ from rich.progress import (
|
|||
TimeRemainingColumn,
|
||||
)
|
||||
|
||||
from flakestorm.core.limits import (
|
||||
MAX_MUTATIONS_PER_RUN,
|
||||
PARALLEL_EXECUTION_ENABLED,
|
||||
check_mutation_limit,
|
||||
print_completion_upsell,
|
||||
print_limit_warning,
|
||||
print_sequential_notice,
|
||||
)
|
||||
# Hardcoded limits for open source edition
|
||||
MAX_MUTATIONS_PER_RUN = 50
|
||||
PARALLEL_EXECUTION_ENABLED = False # Sequential execution only
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from flakestorm.assertions.verifier import InvariantVerifier
|
||||
|
|
@ -116,7 +106,7 @@ class Orchestrator:
|
|||
"""
|
||||
Execute the full test run.
|
||||
|
||||
Open Source Edition runs sequentially. Upgrade to Cloud for parallel.
|
||||
Runs tests sequentially with a maximum of 50 mutations per run.
|
||||
|
||||
Returns:
|
||||
TestResults containing all test outcomes
|
||||
|
|
@ -128,26 +118,17 @@ class Orchestrator:
|
|||
self.state = OrchestratorState()
|
||||
all_results: list[MutationResult] = []
|
||||
|
||||
# Check limits and show notices
|
||||
if self.show_progress:
|
||||
print_sequential_notice(self.console)
|
||||
|
||||
# Phase 1: Generate all mutations
|
||||
all_mutations = await self._generate_mutations()
|
||||
|
||||
# Enforce mutation limit for Open Source
|
||||
# Enforce mutation limit
|
||||
if len(all_mutations) > MAX_MUTATIONS_PER_RUN:
|
||||
violation = check_mutation_limit(
|
||||
self.config.mutations.count,
|
||||
len(self.config.golden_prompts),
|
||||
)
|
||||
if violation:
|
||||
print_limit_warning(self.console, violation)
|
||||
# Truncate to limit
|
||||
all_mutations = all_mutations[:MAX_MUTATIONS_PER_RUN]
|
||||
self.console.print(
|
||||
f"[yellow]⚠️ Limited to {MAX_MUTATIONS_PER_RUN} mutations (Open Source)[/yellow]\n"
|
||||
)
|
||||
if self.show_progress:
|
||||
self.console.print(
|
||||
f"[yellow]⚠️ Limited to {MAX_MUTATIONS_PER_RUN} mutations per run[/yellow]\n"
|
||||
)
|
||||
|
||||
self.state.total_mutations = len(all_mutations)
|
||||
|
||||
|
|
@ -179,10 +160,6 @@ class Orchestrator:
|
|||
|
||||
statistics = self._calculate_statistics(all_results)
|
||||
|
||||
# Show upgrade prompt based on duration
|
||||
if self.show_progress:
|
||||
print_completion_upsell(self.console, self.state.duration_seconds)
|
||||
|
||||
return TestResults(
|
||||
config=self.config,
|
||||
started_at=self.state.started_at,
|
||||
|
|
@ -235,31 +212,15 @@ class Orchestrator:
|
|||
mutations: list[tuple[str, Mutation]],
|
||||
) -> list[MutationResult]:
|
||||
"""
|
||||
Run all mutations.
|
||||
|
||||
Open Source Edition: Sequential execution (one at a time).
|
||||
Cloud Edition: Parallel execution with configurable concurrency.
|
||||
Run all mutations sequentially (one at a time).
|
||||
"""
|
||||
# Open Source: Force sequential execution (concurrency = 1)
|
||||
concurrency = (
|
||||
1 if not PARALLEL_EXECUTION_ENABLED else self.config.advanced.concurrency
|
||||
)
|
||||
semaphore = asyncio.Semaphore(concurrency)
|
||||
|
||||
# Sequential execution for Open Source
|
||||
if not PARALLEL_EXECUTION_ENABLED:
|
||||
results = []
|
||||
for original, mutation in mutations:
|
||||
result = await self._run_single_mutation(original, mutation, semaphore)
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
# Parallel execution (Cloud only)
|
||||
tasks = [
|
||||
self._run_single_mutation(original, mutation, semaphore)
|
||||
for original, mutation in mutations
|
||||
]
|
||||
return await asyncio.gather(*tasks)
|
||||
# Sequential execution only
|
||||
semaphore = asyncio.Semaphore(1)
|
||||
results = []
|
||||
for original, mutation in mutations:
|
||||
result = await self._run_single_mutation(original, mutation, semaphore)
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
async def _run_mutations_with_progress(
|
||||
self,
|
||||
|
|
@ -268,39 +229,16 @@ class Orchestrator:
|
|||
task_id: int,
|
||||
) -> list[MutationResult]:
|
||||
"""
|
||||
Run all mutations with progress display.
|
||||
|
||||
Open Source Edition: Sequential execution.
|
||||
Run all mutations with progress display (sequential execution).
|
||||
"""
|
||||
# Open Source: Force sequential execution
|
||||
concurrency = (
|
||||
1 if not PARALLEL_EXECUTION_ENABLED else self.config.advanced.concurrency
|
||||
)
|
||||
semaphore = asyncio.Semaphore(concurrency)
|
||||
# Sequential execution only
|
||||
semaphore = asyncio.Semaphore(1)
|
||||
results: list[MutationResult] = []
|
||||
|
||||
# Sequential execution for Open Source
|
||||
if not PARALLEL_EXECUTION_ENABLED:
|
||||
for original, mutation in mutations:
|
||||
result = await self._run_single_mutation(original, mutation, semaphore)
|
||||
progress.update(task_id, advance=1)
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
# Parallel execution (Cloud only)
|
||||
async def run_with_progress(
|
||||
original: str,
|
||||
mutation: Mutation,
|
||||
) -> MutationResult:
|
||||
for original, mutation in mutations:
|
||||
result = await self._run_single_mutation(original, mutation, semaphore)
|
||||
progress.update(task_id, advance=1)
|
||||
return result
|
||||
|
||||
tasks = [
|
||||
run_with_progress(original, mutation) for original, mutation in mutations
|
||||
]
|
||||
|
||||
results = await asyncio.gather(*tasks)
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
async def _run_single_mutation(
|
||||
|
|
|
|||
|
|
@ -1,17 +1,15 @@
|
|||
"""
|
||||
flakestorm Integrations Module
|
||||
|
||||
V2 features for integrating with external services:
|
||||
Features for integrating with external services:
|
||||
- HuggingFace model downloading
|
||||
- GitHub Actions for CI/CD
|
||||
- Local embeddings for semantic similarity
|
||||
"""
|
||||
|
||||
# V2 features - import guards for optional dependencies
|
||||
# Import guards for optional dependencies
|
||||
|
||||
__all__ = [
|
||||
"HuggingFaceModelProvider",
|
||||
"GitHubActionsIntegration",
|
||||
"LocalEmbedder",
|
||||
]
|
||||
|
||||
|
|
@ -22,10 +20,6 @@ def __getattr__(name: str):
|
|||
from flakestorm.integrations.huggingface import HuggingFaceModelProvider
|
||||
|
||||
return HuggingFaceModelProvider
|
||||
elif name == "GitHubActionsIntegration":
|
||||
from flakestorm.integrations.github_actions import GitHubActionsIntegration
|
||||
|
||||
return GitHubActionsIntegration
|
||||
elif name == "LocalEmbedder":
|
||||
from flakestorm.assertions.semantic import LocalEmbedder
|
||||
|
||||
|
|
|
|||
|
|
@ -1,255 +0,0 @@
|
|||
"""
|
||||
GitHub Actions Integration
|
||||
|
||||
⚠️ CLOUD FEATURE: GitHub Actions integration is available in flakestorm Cloud.
|
||||
The Open Source edition provides documentation only.
|
||||
|
||||
Upgrade to flakestorm Cloud for:
|
||||
- One-click CI/CD integration
|
||||
- Block PRs based on reliability score
|
||||
- Automated test history tracking
|
||||
- Team notifications
|
||||
|
||||
→ https://flakestorm.cloud
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from flakestorm.core.limits import CLOUD_URL, GITHUB_ACTIONS_ENABLED
|
||||
|
||||
|
||||
class GitHubActionsDisabledError(Exception):
|
||||
"""Raised when trying to use GitHub Actions in Open Source edition."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
"GitHub Actions integration is available in flakestorm Cloud.\n"
|
||||
f"Upgrade at: {CLOUD_URL}"
|
||||
)
|
||||
|
||||
|
||||
# GitHub Action YAML template (for reference/documentation)
|
||||
ACTION_YAML = """# ⚠️ CLOUD FEATURE: This requires flakestorm Cloud
|
||||
# Upgrade at: https://flakestorm.cloud
|
||||
|
||||
name: 'flakestorm Agent Test'
|
||||
description: 'Run chaos testing on AI agents to verify reliability'
|
||||
author: 'flakestorm'
|
||||
|
||||
branding:
|
||||
icon: 'shield'
|
||||
color: 'purple'
|
||||
|
||||
inputs:
|
||||
config:
|
||||
description: 'Path to flakestorm.yaml configuration file'
|
||||
required: false
|
||||
default: 'flakestorm.yaml'
|
||||
min_score:
|
||||
description: 'Minimum robustness score to pass (0.0-1.0)'
|
||||
required: false
|
||||
default: '0.9'
|
||||
api_key:
|
||||
description: 'flakestorm Cloud API key (required)'
|
||||
required: true
|
||||
|
||||
outputs:
|
||||
score:
|
||||
description: 'The robustness score achieved'
|
||||
passed:
|
||||
description: 'Whether the test passed (true/false)'
|
||||
report_url:
|
||||
description: 'URL to the full report on flakestorm Cloud'
|
||||
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install flakestorm
|
||||
shell: bash
|
||||
run: pip install flakestorm
|
||||
|
||||
- name: Run Cloud Tests
|
||||
shell: bash
|
||||
env:
|
||||
FLAKESTORM_API_KEY: ${{ inputs.api_key }}
|
||||
run: |
|
||||
flakestorm cloud run \\
|
||||
--config ${{ inputs.config }} \\
|
||||
--min-score ${{ inputs.min_score }} \\
|
||||
--ci
|
||||
"""
|
||||
|
||||
|
||||
# Example workflow YAML
|
||||
WORKFLOW_EXAMPLE = """# flakestorm Cloud CI/CD Integration
|
||||
# ⚠️ Requires flakestorm Cloud subscription
|
||||
# Get started: https://flakestorm.cloud
|
||||
|
||||
name: Agent Reliability Check
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
reliability-test:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Run flakestorm Cloud Tests
|
||||
uses: flakestorm/flakestorm-action@v1
|
||||
with:
|
||||
config: flakestorm.yaml
|
||||
min_score: '0.9'
|
||||
api_key: ${{ secrets.FLAKESTORM_API_KEY }}
|
||||
"""
|
||||
|
||||
|
||||
class GitHubActionsIntegration:
|
||||
"""
|
||||
Helper class for GitHub Actions integration.
|
||||
|
||||
⚠️ NOTE: Full CI/CD integration requires flakestorm Cloud.
|
||||
|
||||
The Open Source edition provides:
|
||||
- Documentation and examples
|
||||
- Local testing only
|
||||
|
||||
flakestorm Cloud provides:
|
||||
- One-click GitHub Actions setup
|
||||
- Block PRs based on reliability score
|
||||
- Test history and comparison
|
||||
- Slack/Discord notifications
|
||||
|
||||
Upgrade at: https://flakestorm.cloud
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _check_enabled() -> None:
|
||||
"""Check if GitHub Actions is enabled."""
|
||||
if not GITHUB_ACTIONS_ENABLED:
|
||||
raise GitHubActionsDisabledError()
|
||||
|
||||
@staticmethod
|
||||
def generate_action_yaml() -> str:
|
||||
"""
|
||||
Generate the GitHub Action definition YAML.
|
||||
|
||||
Note: This returns documentation only in Open Source edition.
|
||||
Full integration requires flakestorm Cloud.
|
||||
|
||||
Returns:
|
||||
Action YAML content
|
||||
"""
|
||||
return ACTION_YAML.strip()
|
||||
|
||||
@staticmethod
|
||||
def generate_workflow_example() -> str:
|
||||
"""
|
||||
Generate an example workflow that uses flakestorm.
|
||||
|
||||
Note: Requires flakestorm Cloud for full functionality.
|
||||
|
||||
Returns:
|
||||
Workflow YAML content
|
||||
"""
|
||||
return WORKFLOW_EXAMPLE.strip()
|
||||
|
||||
@staticmethod
|
||||
def save_action(output_dir: Path) -> Path:
|
||||
"""
|
||||
Save the GitHub Action files to a directory.
|
||||
|
||||
⚠️ Cloud Feature: This creates documentation only.
|
||||
For working CI/CD, upgrade to flakestorm Cloud.
|
||||
|
||||
Args:
|
||||
output_dir: Directory to save action files
|
||||
|
||||
Returns:
|
||||
Path to the action.yml file
|
||||
"""
|
||||
output_dir = Path(output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
action_path = output_dir / "action.yml"
|
||||
action_path.write_text(ACTION_YAML.strip(), encoding="utf-8")
|
||||
|
||||
# Also create a README explaining Cloud requirement
|
||||
readme_path = output_dir / "README.md"
|
||||
readme_path.write_text(
|
||||
f"""# flakestorm GitHub Action
|
||||
|
||||
⚠️ **Cloud Feature**: Full CI/CD integration requires flakestorm Cloud.
|
||||
|
||||
## What You Get with Cloud
|
||||
|
||||
- ✅ One-click GitHub Actions setup
|
||||
- ✅ Block PRs based on reliability score
|
||||
- ✅ Test history and comparison across runs
|
||||
- ✅ Slack/Discord notifications
|
||||
- ✅ 20x faster parallel execution
|
||||
|
||||
## Upgrade
|
||||
|
||||
Get started at: {CLOUD_URL}
|
||||
|
||||
## Local Testing
|
||||
|
||||
For local-only testing, use the Open Source CLI:
|
||||
|
||||
```bash
|
||||
flakestorm run --config flakestorm.yaml
|
||||
```
|
||||
|
||||
Note: Local runs are sequential and may be slow for large test suites.
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
return action_path
|
||||
|
||||
@staticmethod
|
||||
def save_workflow_example(output_path: Path) -> Path:
|
||||
"""
|
||||
Save an example workflow file.
|
||||
|
||||
Args:
|
||||
output_path: Path to save the workflow file
|
||||
|
||||
Returns:
|
||||
Path to the saved file
|
||||
"""
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(WORKFLOW_EXAMPLE.strip(), encoding="utf-8")
|
||||
|
||||
return output_path
|
||||
|
||||
@staticmethod
|
||||
def setup_ci(
|
||||
repo_path: Path,
|
||||
config_path: str = "flakestorm.yaml",
|
||||
min_score: float = 0.9,
|
||||
) -> None:
|
||||
"""
|
||||
Set up CI/CD integration for a repository.
|
||||
|
||||
⚠️ Cloud Feature: Requires flakestorm Cloud subscription.
|
||||
|
||||
Raises:
|
||||
GitHubActionsDisabledError: Always in Open Source edition
|
||||
"""
|
||||
GitHubActionsIntegration._check_enabled()
|
||||
# Cloud implementation would go here
|
||||
|
|
@ -154,7 +154,7 @@ class HuggingFaceModelProvider:
|
|||
>>> model_name = provider.import_to_ollama(path, "mistral-attacker")
|
||||
>>> # Now use with: ollama run mistral-attacker
|
||||
"""
|
||||
import subprocess
|
||||
import subprocess # nosec B404
|
||||
import tempfile
|
||||
|
||||
model_path = Path(model_path)
|
||||
|
|
@ -196,7 +196,7 @@ SYSTEM You are a helpful assistant that generates text variations.
|
|||
|
||||
try:
|
||||
# Run ollama create command
|
||||
result = subprocess.run(
|
||||
result = subprocess.run( # nosec B603, B607
|
||||
["ollama", "create", model_name, "-f", modelfile_path],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
|
|
@ -270,7 +270,7 @@ SYSTEM You are a helpful assistant that generates text variations.
|
|||
|
||||
try:
|
||||
req = urllib.request.Request(f"{host}/api/version")
|
||||
with urllib.request.urlopen(req, timeout=5) as response:
|
||||
with urllib.request.urlopen(req, timeout=5) as response: # nosec B310
|
||||
return response.status == 200
|
||||
except (urllib.error.URLError, TimeoutError):
|
||||
return False
|
||||
|
|
@ -297,7 +297,7 @@ SYSTEM You are a helpful assistant that generates text variations.
|
|||
|
||||
try:
|
||||
req = urllib.request.Request(f"{host}/api/tags")
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
with urllib.request.urlopen(req, timeout=10) as response: # nosec B310
|
||||
data = json.loads(response.read().decode())
|
||||
return [model["name"] for model in data.get("models", [])]
|
||||
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError):
|
||||
|
|
|
|||
|
|
@ -16,15 +16,12 @@ class MutationType(str, Enum):
|
|||
"""
|
||||
Types of adversarial mutations.
|
||||
|
||||
Open Source Edition includes 5 mutation types:
|
||||
Includes 5 mutation types:
|
||||
- PARAPHRASE: Semantic rewrites
|
||||
- NOISE: Typos and spelling errors
|
||||
- TONE_SHIFT: Tone changes
|
||||
- PROMPT_INJECTION: Basic adversarial attacks
|
||||
- CUSTOM: User-defined mutation templates
|
||||
|
||||
Advanced mutations (sophisticated prompt injections, jailbreaks)
|
||||
are available in flakestorm Cloud.
|
||||
"""
|
||||
|
||||
PARAPHRASE = "paraphrase"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue