From 87dd364f0f15dee30f91f70a0f7fd4e8801d5372 Mon Sep 17 00:00:00 2001 From: Connor Braa Date: Tue, 2 Dec 2025 14:49:46 -0800 Subject: [PATCH 1/9] feat(langraph+lsd): custom encryption at rest Signed-off-by: Connor Braa --- src/docs.json | 1 + src/langsmith/custom-encryption.mdx | 407 ++++++++++++++++++++++++++++ 2 files changed, 408 insertions(+) create mode 100644 src/langsmith/custom-encryption.mdx diff --git a/src/docs.json b/src/docs.json index a1effbaa4e..6dc0272d4d 100644 --- a/src/docs.json +++ b/src/docs.json @@ -1310,6 +1310,7 @@ "langsmith/custom-lifespan", "langsmith/custom-middleware", "langsmith/custom-routes", + "langsmith/custom-encryption", "langsmith/configurable-headers", "langsmith/configurable-logs" ] diff --git a/src/langsmith/custom-encryption.mdx b/src/langsmith/custom-encryption.mdx new file mode 100644 index 0000000000..e17e19331b --- /dev/null +++ b/src/langsmith/custom-encryption.mdx @@ -0,0 +1,407 @@ +--- +title: Add custom at-rest encryption +sidebarTitle: Custom encryption +--- + +This guide shows how to add custom at-rest encryption to your LangGraph Platform application for self-hosted deployments. This allows you to encrypt sensitive data before it's stored in the database, using your own encryption keys and services. + + +Custom at-rest encryption is supported for **self-hosted** LangGraph Platform deployments only (Python graphs only). + + + +Custom encryption is currently only supported for Python graphs. JavaScript/TypeScript support is not yet available. + + +## Overview + +Custom at-rest encryption allows you to: + +- **Encrypt data at rest** - Selectively encrypt metadata, values, checkpoint blobs, and other data on assistants, threads, runs, and crons +- **Use your own encryption service** - Integrate with AWS KMS, Google Cloud KMS, HashiCorp Vault, or any other encryption service +- **Per-entity key isolation** - Use different encryption keys per tenant, user, or other entity + +The encryption system provides a decorator-based API similar to the Auth system, allowing you to define custom encryption and decryption handlers that are executed server-side. + +## How it works + +1. **Define handlers** - Create encryption and decryption functions decorated with `@encryption.encrypt.blob`, `@encryption.decrypt.blob`, `@encryption.encrypt.json`, and `@encryption.decrypt.json` +2. **Configure** - Add the path to your encryption module in `langgraph.json` +3. **Pass context** - Send encryption context (like tenant ID and key ID) via the `X-Encryption-Context` header +4. **Automatic encryption** - LangGraph automatically encrypts data before storing and decrypts on retrieval + +## Add custom encryption to your deployment + +### 1. Create an encryption module + +Create a Python file (e.g., `encrypt.py`) with your encryption handlers. You can implement **blob handlers**, **JSON handlers**, or both depending on your needs: + +- **Blob handlers** encrypt checkpoint data (opaque binary state from graph execution) +- **JSON handlers** encrypt structured metadata on threads, assistants, runs, and crons + +```python +import base64 + +import boto3 + +from langgraph_sdk import Encryption, EncryptionContext + +encryption = Encryption() +kms_client = boto3.client('kms') +``` + +#### Blob encryption (checkpoint data) + +Use blob handlers to encrypt checkpoint state. This is useful when you need to protect the full execution state of your graphs: + +```python +@encryption.encrypt.blob +async def encrypt_checkpoint(ctx: EncryptionContext, blob: bytes) -> bytes: + """Encrypt checkpoint blob data.""" + tenant_id = ctx.metadata.get("tenant_id", "default") + key_id = ctx.metadata.get("key_id") + response = kms_client.encrypt( + KeyId=key_id, + Plaintext=blob, + EncryptionContext={'tenant_id': tenant_id} + ) + return response['CiphertextBlob'] + + +@encryption.decrypt.blob +async def decrypt_checkpoint(ctx: EncryptionContext, blob: bytes) -> bytes: + """Decrypt checkpoint blob data.""" + tenant_id = ctx.metadata.get("tenant_id", "default") + response = kms_client.decrypt( + CiphertextBlob=blob, + EncryptionContext={'tenant_id': tenant_id} + ) + return response['Plaintext'] +``` + +#### JSON encryption (metadata fields) + +Use JSON handlers to selectively encrypt metadata fields. Unlike blob encryption, JSON encryption lets you choose which fields to encrypt—keeping some fields unencrypted for search and filtering while protecting sensitive data like conversation history: + +```python +# Encrypt by field name or prefix +SENSITIVE_FIELDS = {"messages"} +SENSITIVE_PREFIX = "my.customer.org/" + + +@encryption.encrypt.json +async def encrypt_json_data(ctx: EncryptionContext, data: dict) -> dict: + """Encrypt PII fields, leave others searchable.""" + tenant_id = ctx.metadata.get("tenant_id", "default") + key_id = ctx.metadata.get("key_id") + + encrypted_data = {} + for key, value in data.items(): + if key in SENSITIVE_FIELDS or key.startswith(SENSITIVE_PREFIX): + # Encrypt PII (messages, emails, phone numbers, etc.) + response = kms_client.encrypt( + KeyId=key_id, + Plaintext=str(value).encode(), + EncryptionContext={'tenant_id': tenant_id} + ) + encrypted_data[key] = base64.b64encode( + response['CiphertextBlob'] + ).decode() + else: + # Keep fields like "owner" or "status" unencrypted for search/filtering + encrypted_data[key] = value + + return encrypted_data + + +@encryption.decrypt.json +async def decrypt_json_data(ctx: EncryptionContext, data: dict) -> dict: + """Decrypt PII fields.""" + tenant_id = ctx.metadata.get("tenant_id", "default") + + decrypted_data = {} + for key, value in data.items(): + if (key in SENSITIVE_FIELDS or key.startswith(SENSITIVE_PREFIX)) and isinstance(value, str): + ciphertext = base64.b64decode(value.encode()) + response = kms_client.decrypt( + CiphertextBlob=ciphertext, + EncryptionContext={'tenant_id': tenant_id} + ) + decrypted_data[key] = response['Plaintext'].decode() + else: + decrypted_data[key] = value + + return decrypted_data +``` + + +When data is encrypted, the `ctx.metadata` (from the `X-Encryption-Context` header) is stored alongside it. On decryption, this context is automatically restored—callers don't need to pass the header again when reading data. + + +### 2. Configure in langgraph.json + +Add the path to your encryption module in your `langgraph.json`: + +```json {7-9} +{ + "dependencies": ["."], + "graphs": { + "agent": "./agent.py:graph" + }, + "env": ".env", + "encryption": { + "path": "./encryption.py:encryption" + } +} +``` + +### 3. Pass encryption context in requests + +When making requests to your deployment, include the `X-Encryption-Context` header with a base64-encoded JSON object containing your encryption parameters: + + + +```python +import base64 +import json +from langgraph_sdk import get_client + +# Define encryption context +encryption_context = { + "tenant_id": "customer-123", + "key_id": "arn:aws:kms:us-east-1:123456789:key/abc-def-123" +} + +# Encode as base64 +encoded_context = base64.b64encode( + json.dumps(encryption_context).encode() +).decode() + +# Create client with encryption context header +client = get_client( + url="http://localhost:2024", + headers={"X-Encryption-Context": encoded_context} +) + +# Create a thread with encrypted metadata +thread = await client.threads.create( + metadata={ + "owner": "user-456", # Unencrypted (for search) + "my.customer.org/email": "john@example.com", # Encrypted + "my.customer.org/phone": "+1-555-0123", # Encrypted + } +) +``` + + +```python +import base64 +import json +from langgraph.pregel.remote import RemoteGraph + +# Define encryption context +encryption_context = { + "tenant_id": "customer-123", + "key_id": "arn:aws:kms:us-east-1:123456789:key/abc-def-123" +} + +# Encode as base64 +encoded_context = base64.b64encode( + json.dumps(encryption_context).encode() +).decode() + +# Create remote graph with encryption context +remote_graph = RemoteGraph( + "agent", + url="http://localhost:2024", + headers={"X-Encryption-Context": encoded_context} +) + +# The encryption context is automatically used for all operations +result = await remote_graph.ainvoke( + {"messages": [{"role": "user", "content": "Hello"}]}, + config={"configurable": {"thread_id": "thread-1"}} +) +``` + + +```bash +# Encode encryption context +ENCRYPTION_CONTEXT=$(echo -n '{"tenant_id":"customer-123","key_id":"arn:aws:kms:us-east-1:123456789:key/abc-def-123"}' | base64) + +# Create thread with encrypted metadata +curl -X POST http://localhost:2024/threads \ + -H "Content-Type: application/json" \ + -H "X-Encryption-Context: $ENCRYPTION_CONTEXT" \ + -d '{ + "metadata": { + "owner": "user-456", + "my.customer.org/email": "john@example.com", + "my.customer.org/phone": "+1-555-0123" + } + }' +``` + + + +### 4. (Optional) Derive context from authentication + +Instead of passing the `X-Encryption-Context` header, you can derive encryption context from the authenticated user using `@encryption.context`: + +```python +@encryption.context +async def get_encryption_context(user: BaseUser, ctx: EncryptionContext) -> dict: + """Called once per request after auth. Returns the new ctx.metadata.""" + return { + **ctx.metadata, # Preserve any header context + "tenant_id": getattr(user, "tenant_id", "default"), + "key_id": getattr(user, "key_id", None), + } +``` + +The handler receives the authenticated user and can extract tenant info from user attributes or JWT claims. This eliminates the need for clients to pass encryption context separately. + +## Important considerations + +### Searchability + +Fields with encrypted **values** cannot be reliably searched when using non-deterministic encryption (which most real-world encryption provides). In the example above: + +- ✅ **Can search**: `owner` field (unencrypted) +- ❌ **Cannot search**: `my.customer.org/email` field (encrypted value) + +Design your metadata schema carefully: +- Put searchable/filterable fields in unencrypted metadata +- Put sensitive data that doesn't need to be searched in encrypted fields +- Consider using prefixes to denote which fields should be encrypted + +### What gets encrypted + +The encryption handlers are called for: + +**JSON encryption** (`@encryption.encrypt.json` / `@encryption.decrypt.json`): +- `thread.metadata` +- `thread.values` +- `assistant.metadata` +- `assistant.context` +- `run.metadata` +- `run.kwargs` +- `cron.metadata` +- `cron.payload` + +**Blob encryption** (`@encryption.encrypt.blob` / `@encryption.decrypt.blob`): +- Checkpoint blobs (complex state data) + +### Model-specific handlers + +You can register different encryption handlers for different model types using `@encryption.encrypt.json.thread`, `@encryption.encrypt.json.assistant`, etc.: + +```python +from langgraph_sdk import Encryption, EncryptionContext + +encryption = Encryption() + +# Default handler for models without specific handlers +@encryption.encrypt.json +async def default_encrypt(ctx: EncryptionContext, data: dict) -> dict: + return standard_encrypt(data) + +# Thread-specific handler (uses different KMS key) +@encryption.encrypt.json.thread +async def encrypt_thread(ctx: EncryptionContext, data: dict) -> dict: + return encrypt_with_thread_key(data) + +# Assistant-specific handler +@encryption.encrypt.json.assistant +async def encrypt_assistant(ctx: EncryptionContext, data: dict) -> dict: + return encrypt_with_assistant_key(data) + +# Same pattern for decryption +@encryption.decrypt.json +async def default_decrypt(ctx: EncryptionContext, data: dict) -> dict: + return standard_decrypt(data) + +@encryption.decrypt.json.thread +async def decrypt_thread(ctx: EncryptionContext, data: dict) -> dict: + return decrypt_with_thread_key(data) +``` + +### Security best practices + +1. **Never hardcode keys** - Use environment variables or secret managers +2. **Use KMS services** - Don't implement your own encryption algorithms +3. **Audit logging** - Log encryption/decryption operations for compliance +4. **Key rotation** - Plan for periodic key rotation +5. **Access control** - Restrict access to encryption keys using IAM policies + +## Example: Multi-tenant encryption + +Here's a complete example showing multi-tenant encryption where each customer has their own encryption key: + +```python +import base64 +import os + +import boto3 + +from langgraph_sdk import Encryption, EncryptionContext + +encryption = Encryption() +kms_client = boto3.client('kms', region_name=os.getenv('AWS_REGION')) + +# Map of tenant IDs to KMS key ARNs (in production, fetch from a database) +TENANT_KEYS = { + "customer-123": "arn:aws:kms:us-east-1:123456789:key/abc-123", + "customer-456": "arn:aws:kms:us-east-1:123456789:key/def-456", +} + +@encryption.encrypt.json +async def encrypt_json_data(ctx: EncryptionContext, data: dict) -> dict: + tenant_id = ctx.metadata.get("tenant_id") + if not tenant_id: + raise ValueError("tenant_id is required in encryption context") + + key_id = TENANT_KEYS.get(tenant_id) + if not key_id: + raise ValueError(f"No encryption key found for tenant: {tenant_id}") + + encrypted_data = {} + for key, value in data.items(): + if key.startswith("my.customer.org/"): + response = kms_client.encrypt( + KeyId=key_id, + Plaintext=str(value).encode(), + EncryptionContext={'tenant_id': tenant_id, 'field': key} + ) + encrypted_data[key] = base64.b64encode( + response['CiphertextBlob'] + ).decode() + else: + encrypted_data[key] = value + + return encrypted_data + +@encryption.decrypt.json +async def decrypt_json_data(ctx: EncryptionContext, data: dict) -> dict: + tenant_id = ctx.metadata.get("tenant_id") + if not tenant_id: + raise ValueError("tenant_id is required in encryption context") + + decrypted_data = {} + for key, value in data.items(): + if key.startswith("my.customer.org/") and isinstance(value, str): + ciphertext = base64.b64decode(value.encode()) + response = kms_client.decrypt( + CiphertextBlob=ciphertext, + EncryptionContext={'tenant_id': tenant_id, 'field': key} + ) + decrypted_data[key] = response['Plaintext'].decode() + else: + decrypted_data[key] = value + + return decrypted_data +``` + +## Related resources + +- [Add custom authentication](/langsmith/custom-auth) - Similar decorator-based system for custom auth From 096e36ab80e94b259e6b8acf2e95b95d598c0ad9 Mon Sep 17 00:00:00 2001 From: Connor Braa Date: Wed, 3 Dec 2025 12:10:34 -0800 Subject: [PATCH 2/9] full rework of examples Signed-off-by: Connor Braa --- src/langsmith/custom-encryption.mdx | 510 ++++++++++++---------------- 1 file changed, 225 insertions(+), 285 deletions(-) diff --git a/src/langsmith/custom-encryption.mdx b/src/langsmith/custom-encryption.mdx index e17e19331b..e871581d8d 100644 --- a/src/langsmith/custom-encryption.mdx +++ b/src/langsmith/custom-encryption.mdx @@ -3,405 +3,345 @@ title: Add custom at-rest encryption sidebarTitle: Custom encryption --- -This guide shows how to add custom at-rest encryption to your LangGraph Platform application for self-hosted deployments. This allows you to encrypt sensitive data before it's stored in the database, using your own encryption keys and services. +Custom encryption lets you control how sensitive data is encrypted before it's stored in the database. -Custom at-rest encryption is supported for **self-hosted** LangGraph Platform deployments only (Python graphs only). +Custom encryption is available for **self-hosted** LangGraph Platform deployments only (Python graphs only). - -Custom encryption is currently only supported for Python graphs. JavaScript/TypeScript support is not yet available. - +## When to use custom encryption -## Overview +For basic encryption with a single static key, set the `LANGGRAPH_AES_KEY` environment variable. LangGraph will automatically encrypt checkpoint data using AES. -Custom at-rest encryption allows you to: +Use **custom encryption** when you need: -- **Encrypt data at rest** - Selectively encrypt metadata, values, checkpoint blobs, and other data on assistants, threads, runs, and crons -- **Use your own encryption service** - Integrate with AWS KMS, Google Cloud KMS, HashiCorp Vault, or any other encryption service -- **Per-entity key isolation** - Use different encryption keys per tenant, user, or other entity - -The encryption system provides a decorator-based API similar to the Auth system, allowing you to define custom encryption and decryption handlers that are executed server-side. +- **Per-tenant key isolation** — different encryption keys for different customers +- **KMS integration** — AWS KMS, Google Cloud KMS, or HashiCorp Vault for key management, rotation, and audit logging +- **Selective field encryption** — encrypt sensitive metadata fields while keeping others searchable ## How it works -1. **Define handlers** - Create encryption and decryption functions decorated with `@encryption.encrypt.blob`, `@encryption.decrypt.blob`, `@encryption.encrypt.json`, and `@encryption.decrypt.json` -2. **Configure** - Add the path to your encryption module in `langgraph.json` -3. **Pass context** - Send encryption context (like tenant ID and key ID) via the `X-Encryption-Context` header -4. **Automatic encryption** - LangGraph automatically encrypts data before storing and decrypts on retrieval - -## Add custom encryption to your deployment +1. Create an encryption module with handlers decorated with `@encryption.encrypt.blob`, `@encryption.decrypt.blob`, `@encryption.encrypt.json`, and `@encryption.decrypt.json` +2. Add the module path to `langgraph.json` +3. Pass encryption context (like tenant ID) via the `X-Encryption-Context` header +4. LangGraph calls your handlers before storing and after retrieving data -### 1. Create an encryption module +## Configuration -Create a Python file (e.g., `encrypt.py`) with your encryption handlers. You can implement **blob handlers**, **JSON handlers**, or both depending on your needs: +Add your encryption module to `langgraph.json`: -- **Blob handlers** encrypt checkpoint data (opaque binary state from graph execution) -- **JSON handlers** encrypt structured metadata on threads, assistants, runs, and crons +```json +{ + "dependencies": ["."], + "graphs": { + "agent": "./agent.py:graph" + }, + "encryption": { + "path": "./encryption.py:encryption" + } +} +``` -```python -import base64 +## Blob encryption (checkpoints) -import boto3 +Blob handlers encrypt checkpoint data—the serialized state from graph execution. Here's a simplified example using per-tenant keys: +```python +import os +from cryptography.fernet import Fernet from langgraph_sdk import Encryption, EncryptionContext encryption = Encryption() -kms_client = boto3.client('kms') -``` -#### Blob encryption (checkpoint data) +# In production, fetch from a secrets manager +TENANT_KEYS = { + "tenant-a": Fernet(os.environ["TENANT_A_KEY"]), + "tenant-b": Fernet(os.environ["TENANT_B_KEY"]), +} + + +def _get_fernet(ctx: EncryptionContext) -> Fernet: + tenant_id = ctx.metadata.get("tenant_id") + if not tenant_id or tenant_id not in TENANT_KEYS: + raise ValueError(f"Unknown tenant: {tenant_id}") + return TENANT_KEYS[tenant_id] -Use blob handlers to encrypt checkpoint state. This is useful when you need to protect the full execution state of your graphs: -```python @encryption.encrypt.blob -async def encrypt_checkpoint(ctx: EncryptionContext, blob: bytes) -> bytes: - """Encrypt checkpoint blob data.""" - tenant_id = ctx.metadata.get("tenant_id", "default") - key_id = ctx.metadata.get("key_id") - response = kms_client.encrypt( - KeyId=key_id, - Plaintext=blob, - EncryptionContext={'tenant_id': tenant_id} - ) - return response['CiphertextBlob'] +async def encrypt_blob(ctx: EncryptionContext, data: bytes) -> bytes: + return _get_fernet(ctx).encrypt(data) @encryption.decrypt.blob -async def decrypt_checkpoint(ctx: EncryptionContext, blob: bytes) -> bytes: - """Decrypt checkpoint blob data.""" - tenant_id = ctx.metadata.get("tenant_id", "default") - response = kms_client.decrypt( - CiphertextBlob=blob, - EncryptionContext={'tenant_id': tenant_id} - ) - return response['Plaintext'] +async def decrypt_blob(ctx: EncryptionContext, data: bytes) -> bytes: + return _get_fernet(ctx).decrypt(data) ``` -#### JSON encryption (metadata fields) +The `ctx.metadata` dict comes from the `X-Encryption-Context` header and is stored alongside encrypted data, so the correct key is used on decryption. + +For production deployments with key rotation and audit logging, see [Envelope encryption with AWS KMS](#envelope-encryption-with-aws-kms). + +## JSON encryption (metadata) + +JSON handlers encrypt structured data like thread metadata, assistant context, and run kwargs. Unlike blob encryption, you choose which fields to encrypt—keeping some unencrypted for search and filtering. + + +**Encrypted fields cannot be searched or filtered.** Design your metadata schema so that fields you need to query remain unencrypted. + + +Common fields to leave **unencrypted** for search and filtering: + +- `user_id`, `tenant_id`, `project_id` — for access control queries +- `status`, `priority`, `type` — for filtering by state +- `tags`, `labels` — for categorization queries +- `created_by`, `owner` — for ownership lookups -Use JSON handlers to selectively encrypt metadata fields. Unlike blob encryption, JSON encryption lets you choose which fields to encrypt—keeping some fields unencrypted for search and filtering while protecting sensitive data like conversation history: +Encrypt fields containing **sensitive data**: + +- Conversation messages and content +- Personal information (email, phone, address) +- API keys, tokens, credentials +- Business-sensitive data ```python -# Encrypt by field name or prefix -SENSITIVE_FIELDS = {"messages"} -SENSITIVE_PREFIX = "my.customer.org/" +import os +from cryptography.fernet import Fernet +from langgraph_sdk import Encryption, EncryptionContext +encryption = Encryption() -@encryption.encrypt.json -async def encrypt_json_data(ctx: EncryptionContext, data: dict) -> dict: - """Encrypt PII fields, leave others searchable.""" - tenant_id = ctx.metadata.get("tenant_id", "default") - key_id = ctx.metadata.get("key_id") +TENANT_KEYS = { + "tenant-a": Fernet(os.environ["TENANT_A_KEY"]), + "tenant-b": Fernet(os.environ["TENANT_B_KEY"]), +} - encrypted_data = {} - for key, value in data.items(): - if key in SENSITIVE_FIELDS or key.startswith(SENSITIVE_PREFIX): - # Encrypt PII (messages, emails, phone numbers, etc.) - response = kms_client.encrypt( - KeyId=key_id, - Plaintext=str(value).encode(), - EncryptionContext={'tenant_id': tenant_id} - ) - encrypted_data[key] = base64.b64encode( - response['CiphertextBlob'] - ).decode() - else: - # Keep fields like "owner" or "status" unencrypted for search/filtering - encrypted_data[key] = value +SENSITIVE_FIELDS = {"messages", "email", "phone", "content"} - return encrypted_data +def _get_fernet(ctx: EncryptionContext) -> Fernet: + tenant_id = ctx.metadata.get("tenant_id") + if not tenant_id or tenant_id not in TENANT_KEYS: + raise ValueError(f"Unknown tenant: {tenant_id}") + return TENANT_KEYS[tenant_id] -@encryption.decrypt.json -async def decrypt_json_data(ctx: EncryptionContext, data: dict) -> dict: - """Decrypt PII fields.""" - tenant_id = ctx.metadata.get("tenant_id", "default") - decrypted_data = {} +@encryption.encrypt.json +async def encrypt_json(ctx: EncryptionContext, data: dict) -> dict: + fernet = _get_fernet(ctx) + result = {} for key, value in data.items(): - if (key in SENSITIVE_FIELDS or key.startswith(SENSITIVE_PREFIX)) and isinstance(value, str): - ciphertext = base64.b64decode(value.encode()) - response = kms_client.decrypt( - CiphertextBlob=ciphertext, - EncryptionContext={'tenant_id': tenant_id} - ) - decrypted_data[key] = response['Plaintext'].decode() + if key in SENSITIVE_FIELDS and value is not None: + result[key] = fernet.encrypt(str(value).encode()).decode() else: - decrypted_data[key] = value + result[key] = value + return result - return decrypted_data + +@encryption.decrypt.json +async def decrypt_json(ctx: EncryptionContext, data: dict) -> dict: + fernet = _get_fernet(ctx) + result = {} + for key, value in data.items(): + if key in SENSITIVE_FIELDS and isinstance(value, str): + try: + result[key] = fernet.decrypt(value.encode()).decode() + except Exception: + result[key] = value + else: + result[key] = value + return result ``` - -When data is encrypted, the `ctx.metadata` (from the `X-Encryption-Context` header) is stored alongside it. On decryption, this context is automatically restored—callers don't need to pass the header again when reading data. - +### What gets encrypted -### 2. Configure in langgraph.json +**JSON handlers** (`@encryption.encrypt.json` / `@encryption.decrypt.json`): -Add the path to your encryption module in your `langgraph.json`: +- `thread.metadata`, `thread.values` +- `assistant.metadata`, `assistant.context` +- `run.metadata`, `run.kwargs` +- `cron.metadata`, `cron.payload` -```json {7-9} -{ - "dependencies": ["."], - "graphs": { - "agent": "./agent.py:graph" - }, - "env": ".env", - "encryption": { - "path": "./encryption.py:encryption" - } -} -``` +**Blob handlers** (`@encryption.encrypt.blob` / `@encryption.decrypt.blob`): -### 3. Pass encryption context in requests +- Checkpoint blobs (graph execution state) -When making requests to your deployment, include the `X-Encryption-Context` header with a base64-encoded JSON object containing your encryption parameters: +## Passing encryption context + +Pass encryption context via the `X-Encryption-Context` header. The context is available in your handlers as `ctx.metadata` and is stored alongside encrypted data for use during decryption. - - ```python import base64 import json from langgraph_sdk import get_client -# Define encryption context -encryption_context = { - "tenant_id": "customer-123", - "key_id": "arn:aws:kms:us-east-1:123456789:key/abc-def-123" -} - -# Encode as base64 -encoded_context = base64.b64encode( - json.dumps(encryption_context).encode() +encryption_context = base64.b64encode( + json.dumps({"tenant_id": "tenant-a"}).encode() ).decode() -# Create client with encryption context header -client = get_client( - url="http://localhost:2024", - headers={"X-Encryption-Context": encoded_context} -) +client = get_client(url="http://localhost:2024") -# Create a thread with encrypted metadata -thread = await client.threads.create( - metadata={ - "owner": "user-456", # Unencrypted (for search) - "my.customer.org/email": "john@example.com", # Encrypted - "my.customer.org/phone": "+1-555-0123", # Encrypted - } +result = await client.runs.wait( + thread_id=None, + assistant_id="agent", + input={"messages": [{"role": "user", "content": "Hello"}]}, + headers={"X-Encryption-Context": encryption_context}, ) ``` - - -```python -import base64 -import json -from langgraph.pregel.remote import RemoteGraph -# Define encryption context -encryption_context = { - "tenant_id": "customer-123", - "key_id": "arn:aws:kms:us-east-1:123456789:key/abc-def-123" -} - -# Encode as base64 -encoded_context = base64.b64encode( - json.dumps(encryption_context).encode() -).decode() + +The encryption context is stored with encrypted data. On decryption, it's automatically restored—callers don't need to pass the header when reading. + -# Create remote graph with encryption context -remote_graph = RemoteGraph( - "agent", - url="http://localhost:2024", - headers={"X-Encryption-Context": encoded_context} -) +## Deriving context from authentication -# The encryption context is automatically used for all operations -result = await remote_graph.ainvoke( - {"messages": [{"role": "user", "content": "Hello"}]}, - config={"configurable": {"thread_id": "thread-1"}} -) -``` - - -```bash -# Encode encryption context -ENCRYPTION_CONTEXT=$(echo -n '{"tenant_id":"customer-123","key_id":"arn:aws:kms:us-east-1:123456789:key/abc-def-123"}' | base64) - -# Create thread with encrypted metadata -curl -X POST http://localhost:2024/threads \ - -H "Content-Type: application/json" \ - -H "X-Encryption-Context: $ENCRYPTION_CONTEXT" \ - -d '{ - "metadata": { - "owner": "user-456", - "my.customer.org/email": "john@example.com", - "my.customer.org/phone": "+1-555-0123" - } - }' -``` - - +Instead of passing `X-Encryption-Context` explicitly, derive encryption context from the authenticated user: -### 4. (Optional) Derive context from authentication +```python +from langgraph_sdk import Encryption, EncryptionContext +from starlette.authentication import BaseUser -Instead of passing the `X-Encryption-Context` header, you can derive encryption context from the authenticated user using `@encryption.context`: +encryption = Encryption() -```python @encryption.context async def get_encryption_context(user: BaseUser, ctx: EncryptionContext) -> dict: - """Called once per request after auth. Returns the new ctx.metadata.""" return { - **ctx.metadata, # Preserve any header context + **ctx.metadata, "tenant_id": getattr(user, "tenant_id", "default"), - "key_id": getattr(user, "key_id", None), } ``` -The handler receives the authenticated user and can extract tenant info from user attributes or JWT claims. This eliminates the need for clients to pass encryption context separately. - -## Important considerations - -### Searchability +This handler runs once per request after authentication. The returned dict becomes `ctx.metadata` for all encryption operations in that request. -Fields with encrypted **values** cannot be reliably searched when using non-deterministic encryption (which most real-world encryption provides). In the example above: +## Model-specific handlers -- ✅ **Can search**: `owner` field (unencrypted) -- ❌ **Cannot search**: `my.customer.org/email` field (encrypted value) - -Design your metadata schema carefully: -- Put searchable/filterable fields in unencrypted metadata -- Put sensitive data that doesn't need to be searched in encrypted fields -- Consider using prefixes to denote which fields should be encrypted - -### What gets encrypted - -The encryption handlers are called for: - -**JSON encryption** (`@encryption.encrypt.json` / `@encryption.decrypt.json`): -- `thread.metadata` -- `thread.values` -- `assistant.metadata` -- `assistant.context` -- `run.metadata` -- `run.kwargs` -- `cron.metadata` -- `cron.payload` - -**Blob encryption** (`@encryption.encrypt.blob` / `@encryption.decrypt.blob`): -- Checkpoint blobs (complex state data) - -### Model-specific handlers - -You can register different encryption handlers for different model types using `@encryption.encrypt.json.thread`, `@encryption.encrypt.json.assistant`, etc.: +Register different handlers for different model types using `@encryption.encrypt.json.assistant`, `@encryption.encrypt.json.run`, etc. Use `ctx.field` to vary behavior by field: ```python from langgraph_sdk import Encryption, EncryptionContext encryption = Encryption() -# Default handler for models without specific handlers @encryption.encrypt.json async def default_encrypt(ctx: EncryptionContext, data: dict) -> dict: - return standard_encrypt(data) - -# Thread-specific handler (uses different KMS key) -@encryption.encrypt.json.thread -async def encrypt_thread(ctx: EncryptionContext, data: dict) -> dict: - return encrypt_with_thread_key(data) + return encrypt_sensitive_fields(data) -# Assistant-specific handler @encryption.encrypt.json.assistant async def encrypt_assistant(ctx: EncryptionContext, data: dict) -> dict: - return encrypt_with_assistant_key(data) + if ctx.field == "context": + # Assistant context may contain API keys or system prompts + return encrypt_all_fields(data) + # Assistant metadata—selective encryption + return encrypt_sensitive_fields(data) -# Same pattern for decryption @encryption.decrypt.json async def default_decrypt(ctx: EncryptionContext, data: dict) -> dict: - return standard_decrypt(data) + return decrypt_sensitive_fields(data) -@encryption.decrypt.json.thread -async def decrypt_thread(ctx: EncryptionContext, data: dict) -> dict: - return decrypt_with_thread_key(data) +@encryption.decrypt.json.assistant +async def decrypt_assistant(ctx: EncryptionContext, data: dict) -> dict: + if ctx.field == "context": + return decrypt_all_fields(data) + return decrypt_sensitive_fields(data) ``` -### Security best practices +Supported model types: `thread`, `assistant`, `run`, `cron`, `checkpoint`. -1. **Never hardcode keys** - Use environment variables or secret managers -2. **Use KMS services** - Don't implement your own encryption algorithms -3. **Audit logging** - Log encryption/decryption operations for compliance -4. **Key rotation** - Plan for periodic key rotation -5. **Access control** - Restrict access to encryption keys using IAM policies +## Envelope encryption with AWS Encryption SDK -## Example: Multi-tenant encryption +For production deployments on AWS, use the [AWS Encryption SDK](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/python.html) with AWS KMS, or an equivalent within your cloud provider. This approach: -Here's a complete example showing multi-tenant encryption where each customer has their own encryption key: +- Handles envelope encryption automatically (no manual key packing) +- Provides key rotation and audit logging +- Binds ciphertext to encryption context (tenant isolation) + +### Complete example ```python import base64 import os -import boto3 - +import aws_encryption_sdk +from aws_encryption_sdk import ( + CachingCryptoMaterialsManager, + CommitmentPolicy, + LocalCryptoMaterialsCache, + StrictAwsKmsMasterKeyProvider, +) from langgraph_sdk import Encryption, EncryptionContext encryption = Encryption() -kms_client = boto3.client('kms', region_name=os.getenv('AWS_REGION')) -# Map of tenant IDs to KMS key ARNs (in production, fetch from a database) -TENANT_KEYS = { - "customer-123": "arn:aws:kms:us-east-1:123456789:key/abc-123", - "customer-456": "arn:aws:kms:us-east-1:123456789:key/def-456", -} +# The SDK uses envelope encryption: one KMS API call generates a data key, +# then encrypts/decrypts locally. The cache reuses data keys across operations. +client = aws_encryption_sdk.EncryptionSDKClient( + commitment_policy=CommitmentPolicy.REQUIRE_ENCRYPT_REQUIRE_DECRYPT +) +key_provider = StrictAwsKmsMasterKeyProvider(key_ids=[os.environ["KMS_KEY_ARN"]]) +cache = LocalCryptoMaterialsCache(capacity=100) +cmm = CachingCryptoMaterialsManager( + master_key_provider=key_provider, + cache=cache, + max_age=300.0, + max_messages_encrypted=100, +) -@encryption.encrypt.json -async def encrypt_json_data(ctx: EncryptionContext, data: dict) -> dict: - tenant_id = ctx.metadata.get("tenant_id") - if not tenant_id: - raise ValueError("tenant_id is required in encryption context") +SENSITIVE_FIELDS = {"messages", "email", "phone", "content"} - key_id = TENANT_KEYS.get(tenant_id) - if not key_id: - raise ValueError(f"No encryption key found for tenant: {tenant_id}") - encrypted_data = {} +@encryption.encrypt.blob +async def encrypt_blob(ctx: EncryptionContext, data: bytes) -> bytes: + ciphertext, _ = client.encrypt( + source=data, + materials_manager=cmm, + encryption_context={"tenant_id": ctx.metadata["tenant_id"]}, + ) + return ciphertext + + +@encryption.decrypt.blob +async def decrypt_blob(ctx: EncryptionContext, data: bytes) -> bytes: + plaintext, _ = client.decrypt(source=data, key_provider=key_provider) + return plaintext + + +@encryption.encrypt.json +async def encrypt_json(ctx: EncryptionContext, data: dict) -> dict: + result = {} for key, value in data.items(): - if key.startswith("my.customer.org/"): - response = kms_client.encrypt( - KeyId=key_id, - Plaintext=str(value).encode(), - EncryptionContext={'tenant_id': tenant_id, 'field': key} + if key in SENSITIVE_FIELDS and value is not None: + ciphertext, _ = client.encrypt( + source=str(value).encode(), + materials_manager=cmm, + encryption_context={"tenant_id": ctx.metadata["tenant_id"]}, ) - encrypted_data[key] = base64.b64encode( - response['CiphertextBlob'] - ).decode() + result[key] = base64.b64encode(ciphertext).decode() else: - encrypted_data[key] = value + result[key] = value + return result - return encrypted_data @encryption.decrypt.json -async def decrypt_json_data(ctx: EncryptionContext, data: dict) -> dict: - tenant_id = ctx.metadata.get("tenant_id") - if not tenant_id: - raise ValueError("tenant_id is required in encryption context") - - decrypted_data = {} +async def decrypt_json(ctx: EncryptionContext, data: dict) -> dict: + result = {} for key, value in data.items(): - if key.startswith("my.customer.org/") and isinstance(value, str): - ciphertext = base64.b64decode(value.encode()) - response = kms_client.decrypt( - CiphertextBlob=ciphertext, - EncryptionContext={'tenant_id': tenant_id, 'field': key} - ) - decrypted_data[key] = response['Plaintext'].decode() + if key in SENSITIVE_FIELDS and isinstance(value, str): + try: + ciphertext = base64.b64decode(value) + plaintext, _ = client.decrypt(source=ciphertext, key_provider=key_provider) + result[key] = plaintext.decode() + except Exception: + result[key] = value else: - decrypted_data[key] = value - - return decrypted_data + result[key] = value + return result ``` -## Related resources +The `encryption_context` is cryptographically bound to the ciphertext via KMS—decryption fails if the context doesn't match. The context is embedded in the ciphertext, so decrypt handlers don't need to reference `ctx.metadata`. + +### Key rotation + +KMS handles master key rotation automatically. When you enable automatic rotation on your KMS key, old encrypted data keys can still be decrypted while new operations use the rotated key material. No re-encryption of existing data is required. + +## Related -- [Add custom authentication](/langsmith/custom-auth) - Similar decorator-based system for custom auth +- [Custom authentication](/langsmith/custom-auth) From 0afab9efcd35819c02b015247492025a2b963f41 Mon Sep 17 00:00:00 2001 From: Connor Braa Date: Wed, 3 Dec 2025 12:46:13 -0800 Subject: [PATCH 3/9] warning regarding migration Signed-off-by: Connor Braa --- src/langsmith/custom-encryption.mdx | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/langsmith/custom-encryption.mdx b/src/langsmith/custom-encryption.mdx index e871581d8d..fcb1904330 100644 --- a/src/langsmith/custom-encryption.mdx +++ b/src/langsmith/custom-encryption.mdx @@ -89,6 +89,10 @@ JSON handlers encrypt structured data like thread metadata, assistant context, a **Encrypted fields cannot be searched or filtered.** Design your metadata schema so that fields you need to query remain unencrypted. + +**Migration required for existing data.** Unlike blob encryption, JSON handlers have no automatic way to detect whether a field value is already encrypted. If you enable JSON encryption on a system with existing unencrypted data, your decrypt handler may fail when it tries to decrypt plaintext values. Either migrate existing data before enabling encryption, or catch decryption errors and return the original value (see examples below). + + Common fields to leave **unencrypted** for search and filtering: - `user_id`, `tenant_id`, `project_id` — for access control queries @@ -105,7 +109,7 @@ Encrypt fields containing **sensitive data**: ```python import os -from cryptography.fernet import Fernet +from cryptography.fernet import Fernet, InvalidToken from langgraph_sdk import Encryption, EncryptionContext encryption = Encryption() @@ -145,7 +149,8 @@ async def decrypt_json(ctx: EncryptionContext, data: dict) -> dict: if key in SENSITIVE_FIELDS and isinstance(value, str): try: result[key] = fernet.decrypt(value.encode()).decode() - except Exception: + except InvalidToken: + # Not encrypted (pre-migration data), return as-is result[key] = value else: result[key] = value @@ -253,14 +258,17 @@ For production deployments on AWS, use the [AWS Encryption SDK](https://docs.aws - Handles envelope encryption automatically (no manual key packing) - Provides key rotation and audit logging - Binds ciphertext to encryption context (tenant isolation) +- Caches data keys locally to avoid repeated KMS calls, latency and rate limits ### Complete example ```python import base64 import os +from binascii import Error as BinasciiError import aws_encryption_sdk +from aws_encryption_sdk.exceptions import NotSupportedError, SerializationError from aws_encryption_sdk import ( CachingCryptoMaterialsManager, CommitmentPolicy, @@ -271,7 +279,7 @@ from langgraph_sdk import Encryption, EncryptionContext encryption = Encryption() -# The SDK uses envelope encryption: one KMS API call generates a data key, +# The AWS Encryption SDK uses envelope encryption: one KMS API call generates a data key, # then encrypts/decrypts locally. The cache reuses data keys across operations. client = aws_encryption_sdk.EncryptionSDKClient( commitment_policy=CommitmentPolicy.REQUIRE_ENCRYPT_REQUIRE_DECRYPT @@ -329,7 +337,8 @@ async def decrypt_json(ctx: EncryptionContext, data: dict) -> dict: ciphertext = base64.b64decode(value) plaintext, _ = client.decrypt(source=ciphertext, key_provider=key_provider) result[key] = plaintext.decode() - except Exception: + except (BinasciiError, SerializationError, NotSupportedError): + # Not encrypted (pre-migration data), return as-is result[key] = value else: result[key] = value From 03be1c82badcdc432e24abfb62a2946c4bfdd4cb Mon Sep 17 00:00:00 2001 From: Connor Braa Date: Wed, 3 Dec 2025 13:33:29 -0800 Subject: [PATCH 4/9] update with __encrypted__ approach for migration Signed-off-by: Connor Braa --- src/langsmith/custom-encryption.mdx | 85 +++++++++++++---------------- 1 file changed, 38 insertions(+), 47 deletions(-) diff --git a/src/langsmith/custom-encryption.mdx b/src/langsmith/custom-encryption.mdx index fcb1904330..9e3c9c9b98 100644 --- a/src/langsmith/custom-encryption.mdx +++ b/src/langsmith/custom-encryption.mdx @@ -79,7 +79,7 @@ async def decrypt_blob(ctx: EncryptionContext, data: bytes) -> bytes: The `ctx.metadata` dict comes from the `X-Encryption-Context` header and is stored alongside encrypted data, so the correct key is used on decryption. -For production deployments with key rotation and audit logging, see [Envelope encryption with AWS KMS](#envelope-encryption-with-aws-kms). +For production deployments with key rotation and audit logging, see [Envelope encryption with AWS Encryption SDK](#envelope-encryption-with-aws-encryption-sdk). ## JSON encryption (metadata) @@ -89,9 +89,9 @@ JSON handlers encrypt structured data like thread metadata, assistant context, a **Encrypted fields cannot be searched or filtered.** Design your metadata schema so that fields you need to query remain unencrypted. - -**Migration required for existing data.** Unlike blob encryption, JSON handlers have no automatic way to detect whether a field value is already encrypted. If you enable JSON encryption on a system with existing unencrypted data, your decrypt handler may fail when it tries to decrypt plaintext values. Either migrate existing data before enabling encryption, or catch decryption errors and return the original value (see examples below). - + +**Migration consideration:** Unlike blob encryption, JSON handlers have no built-in way to detect whether a field value is already encrypted. We recommend storing all encrypted values under a single key (e.g., `__encrypted__`)—if the key exists, decrypt it; if not, data is unencrypted and passes through unchanged. The examples below use this pattern. + Common fields to leave **unencrypted** for search and filtering: @@ -108,8 +108,9 @@ Encrypt fields containing **sensitive data**: - Business-sensitive data ```python +import json import os -from cryptography.fernet import Fernet, InvalidToken +from cryptography.fernet import Fernet from langgraph_sdk import Encryption, EncryptionContext encryption = Encryption() @@ -120,6 +121,7 @@ TENANT_KEYS = { } SENSITIVE_FIELDS = {"messages", "email", "phone", "content"} +ENCRYPTED_KEY = "__encrypted__" def _get_fernet(ctx: EncryptionContext) -> Fernet: @@ -132,28 +134,23 @@ def _get_fernet(ctx: EncryptionContext) -> Fernet: @encryption.encrypt.json async def encrypt_json(ctx: EncryptionContext, data: dict) -> dict: fernet = _get_fernet(ctx) - result = {} - for key, value in data.items(): - if key in SENSITIVE_FIELDS and value is not None: - result[key] = fernet.encrypt(str(value).encode()).decode() - else: - result[key] = value + to_encrypt = {k: v for k, v in data.items() if k in SENSITIVE_FIELDS and v is not None} + if not to_encrypt: + return data + result = {k: v for k, v in data.items() if k not in SENSITIVE_FIELDS} + result[ENCRYPTED_KEY] = fernet.encrypt(json.dumps(to_encrypt).encode()).decode() return result @encryption.decrypt.json async def decrypt_json(ctx: EncryptionContext, data: dict) -> dict: + if ENCRYPTED_KEY not in data: + return data # Not encrypted, pass through unchanged fernet = _get_fernet(ctx) - result = {} - for key, value in data.items(): - if key in SENSITIVE_FIELDS and isinstance(value, str): - try: - result[key] = fernet.decrypt(value.encode()).decode() - except InvalidToken: - # Not encrypted (pre-migration data), return as-is - result[key] = value - else: - result[key] = value + encrypted_blob = data[ENCRYPTED_KEY] + decrypted = json.loads(fernet.decrypt(encrypted_blob.encode()).decode()) + result = {k: v for k, v in data.items() if k != ENCRYPTED_KEY} + result.update(decrypted) return result ``` @@ -264,11 +261,10 @@ For production deployments on AWS, use the [AWS Encryption SDK](https://docs.aws ```python import base64 +import json import os -from binascii import Error as BinasciiError import aws_encryption_sdk -from aws_encryption_sdk.exceptions import NotSupportedError, SerializationError from aws_encryption_sdk import ( CachingCryptoMaterialsManager, CommitmentPolicy, @@ -279,7 +275,7 @@ from langgraph_sdk import Encryption, EncryptionContext encryption = Encryption() -# The AWS Encryption SDK uses envelope encryption: one KMS API call generates a data key, +# The SDK uses envelope encryption: one KMS API call generates a data key, # then encrypts/decrypts locally. The cache reuses data keys across operations. client = aws_encryption_sdk.EncryptionSDKClient( commitment_policy=CommitmentPolicy.REQUIRE_ENCRYPT_REQUIRE_DECRYPT @@ -294,6 +290,7 @@ cmm = CachingCryptoMaterialsManager( ) SENSITIVE_FIELDS = {"messages", "email", "phone", "content"} +ENCRYPTED_KEY = "__encrypted__" @encryption.encrypt.blob @@ -314,34 +311,28 @@ async def decrypt_blob(ctx: EncryptionContext, data: bytes) -> bytes: @encryption.encrypt.json async def encrypt_json(ctx: EncryptionContext, data: dict) -> dict: - result = {} - for key, value in data.items(): - if key in SENSITIVE_FIELDS and value is not None: - ciphertext, _ = client.encrypt( - source=str(value).encode(), - materials_manager=cmm, - encryption_context={"tenant_id": ctx.metadata["tenant_id"]}, - ) - result[key] = base64.b64encode(ciphertext).decode() - else: - result[key] = value + to_encrypt = {k: v for k, v in data.items() if k in SENSITIVE_FIELDS and v is not None} + if not to_encrypt: + return data + ciphertext, _ = client.encrypt( + source=json.dumps(to_encrypt).encode(), + materials_manager=cmm, + encryption_context={"tenant_id": ctx.metadata["tenant_id"]}, + ) + result = {k: v for k, v in data.items() if k not in SENSITIVE_FIELDS} + result[ENCRYPTED_KEY] = base64.b64encode(ciphertext).decode() return result @encryption.decrypt.json async def decrypt_json(ctx: EncryptionContext, data: dict) -> dict: - result = {} - for key, value in data.items(): - if key in SENSITIVE_FIELDS and isinstance(value, str): - try: - ciphertext = base64.b64decode(value) - plaintext, _ = client.decrypt(source=ciphertext, key_provider=key_provider) - result[key] = plaintext.decode() - except (BinasciiError, SerializationError, NotSupportedError): - # Not encrypted (pre-migration data), return as-is - result[key] = value - else: - result[key] = value + if ENCRYPTED_KEY not in data: + return data # Not encrypted, pass through unchanged + ciphertext = base64.b64decode(data[ENCRYPTED_KEY]) + plaintext, _ = client.decrypt(source=ciphertext, key_provider=key_provider) + decrypted = json.loads(plaintext.decode()) + result = {k: v for k, v in data.items() if k != ENCRYPTED_KEY} + result.update(decrypted) return result ``` From afc19e26a9d81197a1ee2499e3f88aaadfd70630 Mon Sep 17 00:00:00 2001 From: Connor Braa Date: Wed, 3 Dec 2025 13:43:06 -0800 Subject: [PATCH 5/9] user[tenant_id] Signed-off-by: Connor Braa --- src/langsmith/custom-encryption.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/langsmith/custom-encryption.mdx b/src/langsmith/custom-encryption.mdx index 9e3c9c9b98..0a46ae99df 100644 --- a/src/langsmith/custom-encryption.mdx +++ b/src/langsmith/custom-encryption.mdx @@ -208,7 +208,7 @@ encryption = Encryption() async def get_encryption_context(user: BaseUser, ctx: EncryptionContext) -> dict: return { **ctx.metadata, - "tenant_id": getattr(user, "tenant_id", "default"), + "tenant_id": user["tenant_id"], } ``` From c7ee0d86d721432758795e653993f2086e30826d Mon Sep 17 00:00:00 2001 From: Connor Braa Date: Wed, 3 Dec 2025 16:45:15 -0800 Subject: [PATCH 6/9] polish list of fields to leave unencrypted Signed-off-by: Connor Braa --- src/langsmith/custom-encryption.mdx | 57 ++++++++++++++++------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/src/langsmith/custom-encryption.mdx b/src/langsmith/custom-encryption.mdx index 0a46ae99df..e535328457 100644 --- a/src/langsmith/custom-encryption.mdx +++ b/src/langsmith/custom-encryption.mdx @@ -95,17 +95,10 @@ JSON handlers encrypt structured data like thread metadata, assistant context, a Common fields to leave **unencrypted** for search and filtering: -- `user_id`, `tenant_id`, `project_id` — for access control queries -- `status`, `priority`, `type` — for filtering by state -- `tags`, `labels` — for categorization queries -- `created_by`, `owner` — for ownership lookups - -Encrypt fields containing **sensitive data**: - -- Conversation messages and content -- Personal information (email, phone, address) -- API keys, tokens, credentials -- Business-sensitive data +- User-defined fields for access control queries (e.g., `tenant_id`, `owner`) +- `run_id`, `thread_id`, `graph_id`, `assistant_id`, `user_id`, `checkpoint_id` — system-populated identifiers +- `source`, `step`, `parents`, `run_attempt` — system-populated execution state +- `langgraph_version`, `langgraph_api_version`, `langgraph_plan`, `langgraph_host`, `langgraph_api_url`, `langgraph_request_id`, `langgraph_auth_user`, `langgraph_auth_user_id`, `langgraph_auth_permissions` — system-populated platform metadata ```python import json @@ -120,7 +113,14 @@ TENANT_KEYS = { "tenant-b": Fernet(os.environ["TENANT_B_KEY"]), } -SENSITIVE_FIELDS = {"messages", "email", "phone", "content"} +SKIP_FIELDS = { + "tenant_id", "owner", + "run_id", "thread_id", "graph_id", "assistant_id", "user_id", "checkpoint_id", + "source", "step", "parents", "run_attempt", + "langgraph_version", "langgraph_api_version", "langgraph_plan", "langgraph_host", + "langgraph_api_url", "langgraph_request_id", "langgraph_auth_user", + "langgraph_auth_user_id", "langgraph_auth_permissions", +} ENCRYPTED_KEY = "__encrypted__" @@ -134,10 +134,10 @@ def _get_fernet(ctx: EncryptionContext) -> Fernet: @encryption.encrypt.json async def encrypt_json(ctx: EncryptionContext, data: dict) -> dict: fernet = _get_fernet(ctx) - to_encrypt = {k: v for k, v in data.items() if k in SENSITIVE_FIELDS and v is not None} + to_encrypt = {k: v for k, v in data.items() if k not in SKIP_FIELDS and v is not None} if not to_encrypt: return data - result = {k: v for k, v in data.items() if k not in SENSITIVE_FIELDS} + result = {k: v for k, v in data.items() if k in SKIP_FIELDS} result[ENCRYPTED_KEY] = fernet.encrypt(json.dumps(to_encrypt).encode()).decode() return result @@ -225,25 +225,23 @@ encryption = Encryption() @encryption.encrypt.json async def default_encrypt(ctx: EncryptionContext, data: dict) -> dict: - return encrypt_sensitive_fields(data) + return encrypt_with_skip_fields(data, SKIP_FIELDS) @encryption.encrypt.json.assistant async def encrypt_assistant(ctx: EncryptionContext, data: dict) -> dict: if ctx.field == "context": - # Assistant context may contain API keys or system prompts - return encrypt_all_fields(data) - # Assistant metadata—selective encryption - return encrypt_sensitive_fields(data) + # Assistant context may contain API keys or system prompts—encrypt everything + return encrypt_with_skip_fields(data, skip_fields=set()) + # Assistant metadata—skip system fields + return encrypt_with_skip_fields(data, SKIP_FIELDS) @encryption.decrypt.json async def default_decrypt(ctx: EncryptionContext, data: dict) -> dict: - return decrypt_sensitive_fields(data) + return decrypt_encrypted_fields(data) @encryption.decrypt.json.assistant async def decrypt_assistant(ctx: EncryptionContext, data: dict) -> dict: - if ctx.field == "context": - return decrypt_all_fields(data) - return decrypt_sensitive_fields(data) + return decrypt_encrypted_fields(data) ``` Supported model types: `thread`, `assistant`, `run`, `cron`, `checkpoint`. @@ -289,7 +287,14 @@ cmm = CachingCryptoMaterialsManager( max_messages_encrypted=100, ) -SENSITIVE_FIELDS = {"messages", "email", "phone", "content"} +SKIP_FIELDS = { + "tenant_id", "owner", + "run_id", "thread_id", "graph_id", "assistant_id", "user_id", "checkpoint_id", + "source", "step", "parents", "run_attempt", + "langgraph_version", "langgraph_api_version", "langgraph_plan", "langgraph_host", + "langgraph_api_url", "langgraph_request_id", "langgraph_auth_user", + "langgraph_auth_user_id", "langgraph_auth_permissions", +} ENCRYPTED_KEY = "__encrypted__" @@ -311,7 +316,7 @@ async def decrypt_blob(ctx: EncryptionContext, data: bytes) -> bytes: @encryption.encrypt.json async def encrypt_json(ctx: EncryptionContext, data: dict) -> dict: - to_encrypt = {k: v for k, v in data.items() if k in SENSITIVE_FIELDS and v is not None} + to_encrypt = {k: v for k, v in data.items() if k not in SKIP_FIELDS and v is not None} if not to_encrypt: return data ciphertext, _ = client.encrypt( @@ -319,7 +324,7 @@ async def encrypt_json(ctx: EncryptionContext, data: dict) -> dict: materials_manager=cmm, encryption_context={"tenant_id": ctx.metadata["tenant_id"]}, ) - result = {k: v for k, v in data.items() if k not in SENSITIVE_FIELDS} + result = {k: v for k, v in data.items() if k in SKIP_FIELDS} result[ENCRYPTED_KEY] = base64.b64encode(ciphertext).decode() return result From 37fbc1d67f6ea797f795da84dc07fc121474bd75 Mon Sep 17 00:00:00 2001 From: Connor Braa Date: Thu, 4 Dec 2025 11:23:53 -0800 Subject: [PATCH 7/9] address josh's comments Signed-off-by: Connor Braa --- src/docs.json | 2 +- .../{custom-encryption.mdx => encryption.mdx} | 151 ++++++++++-------- 2 files changed, 89 insertions(+), 64 deletions(-) rename src/langsmith/{custom-encryption.mdx => encryption.mdx} (82%) diff --git a/src/docs.json b/src/docs.json index 6dc0272d4d..b4a334ea4d 100644 --- a/src/docs.json +++ b/src/docs.json @@ -1310,7 +1310,7 @@ "langsmith/custom-lifespan", "langsmith/custom-middleware", "langsmith/custom-routes", - "langsmith/custom-encryption", + "langsmith/encryption", "langsmith/configurable-headers", "langsmith/configurable-logs" ] diff --git a/src/langsmith/custom-encryption.mdx b/src/langsmith/encryption.mdx similarity index 82% rename from src/langsmith/custom-encryption.mdx rename to src/langsmith/encryption.mdx index e535328457..8e122b096b 100644 --- a/src/langsmith/custom-encryption.mdx +++ b/src/langsmith/encryption.mdx @@ -1,32 +1,57 @@ --- -title: Add custom at-rest encryption -sidebarTitle: Custom encryption +title: Add encryption at-rest +sidebarTitle: Encryption at-rest --- -Custom encryption lets you control how sensitive data is encrypted before it's stored in the database. +LangGraph Platform supports encryption at-rest for checkpoint data and metadata. You can choose between basic encryption with a single key or custom encryption for advanced use cases. -Custom encryption is available for **self-hosted** LangGraph Platform deployments only (Python graphs only). +Encryption at-rest is available for LangGraph Platform deployments (Python graphs only). -## When to use custom encryption +## Choosing an encryption method -For basic encryption with a single static key, set the `LANGGRAPH_AES_KEY` environment variable. LangGraph will automatically encrypt checkpoint data using AES. +| Method | What's encrypted | Use case | +|--------|------------------|----------| +| **Basic encryption** | Checkpoint blobs only | Single static key, automatic AES encryption | +| **Custom encryption** | Checkpoints + JSON fields | Per-tenant keys, KMS integration, selective field encryption | -Use **custom encryption** when you need: +## Basic encryption + +For simple encryption with a single static key, set the `LANGGRAPH_AES_KEY` environment variable. LangGraph will automatically encrypt checkpoint blobs using AES. + +1. Add `pycryptodome` to your dependencies in `langgraph.json`: + ```json + { + "dependencies": [".", "pycryptodome"], + "graphs": { + "agent": "./agent.py:graph" + } + } + ``` + +2. Set the `LANGGRAPH_AES_KEY` environment variable to a 16, 24, or 32-byte key (for AES-128, AES-192, or AES-256 respectively). + +Basic encryption only encrypts checkpoint blobs. Metadata fields remain unencrypted and searchable. + +## Custom encryption + +Use custom encryption when you need: - **Per-tenant key isolation** — different encryption keys for different customers - **KMS integration** — AWS KMS, Google Cloud KMS, or HashiCorp Vault for key management, rotation, and audit logging - **Selective field encryption** — encrypt sensitive metadata fields while keeping others searchable -## How it works +### How it works -1. Create an encryption module with handlers decorated with `@encryption.encrypt.blob`, `@encryption.decrypt.blob`, `@encryption.encrypt.json`, and `@encryption.decrypt.json` -2. Add the module path to `langgraph.json` -3. Pass encryption context (like tenant ID) via the `X-Encryption-Context` header +1. [Configure](#configuration) the encryption module path in `langgraph.json` +2. [Define your encryption module](#defining-your-encryption-module) with handlers for blob and JSON encryption +3. [Pass encryption context](#passing-encryption-context) (like tenant ID) via the `X-Encryption-Context` header 4. LangGraph calls your handlers before storing and after retrieving data -## Configuration +For production deployments with key rotation and audit logging, see [Envelope encryption with AWS Encryption SDK](#envelope-encryption-with-aws-encryption-sdk). + +### Configuration Add your encryption module to `langgraph.json`: @@ -42,9 +67,11 @@ Add your encryption module to `langgraph.json`: } ``` -## Blob encryption (checkpoints) +### Defining your encryption module + +#### Blob encryption (checkpoints) -Blob handlers encrypt checkpoint data—the serialized state from graph execution. Here's a simplified example using per-tenant keys: +Blob handlers encrypt checkpoint data—the serialized state from graph execution. Here's a simplified example using per-tenant keys with [Fernet](https://cryptography.io/en/latest/fernet/) (a symmetric encryption scheme from the `cryptography` library): ```python import os @@ -79,9 +106,7 @@ async def decrypt_blob(ctx: EncryptionContext, data: bytes) -> bytes: The `ctx.metadata` dict comes from the `X-Encryption-Context` header and is stored alongside encrypted data, so the correct key is used on decryption. -For production deployments with key rotation and audit logging, see [Envelope encryption with AWS Encryption SDK](#envelope-encryption-with-aws-encryption-sdk). - -## JSON encryption (metadata) +#### JSON encryption (metadata) JSON handlers encrypt structured data like thread metadata, assistant context, and run kwargs. Unlike blob encryption, you choose which fields to encrypt—keeping some unencrypted for search and filtering. @@ -154,7 +179,7 @@ async def decrypt_json(ctx: EncryptionContext, data: dict) -> dict: return result ``` -### What gets encrypted +##### What gets encrypted **JSON handlers** (`@encryption.encrypt.json` / `@encryption.decrypt.json`): @@ -167,34 +192,39 @@ async def decrypt_json(ctx: EncryptionContext, data: dict) -> dict: - Checkpoint blobs (graph execution state) -## Passing encryption context +#### Model-specific handlers -Pass encryption context via the `X-Encryption-Context` header. The context is available in your handlers as `ctx.metadata` and is stored alongside encrypted data for use during decryption. +Register different handlers for different model types using `@encryption.encrypt.json.assistant`, `@encryption.encrypt.json.run`, etc. Use `ctx.field` to vary behavior by field: ```python -import base64 -import json -from langgraph_sdk import get_client +from langgraph_sdk import Encryption, EncryptionContext -encryption_context = base64.b64encode( - json.dumps({"tenant_id": "tenant-a"}).encode() -).decode() +encryption = Encryption() -client = get_client(url="http://localhost:2024") +@encryption.encrypt.json +async def default_encrypt(ctx: EncryptionContext, data: dict) -> dict: + return encrypt_with_skip_fields(data, SKIP_FIELDS) -result = await client.runs.wait( - thread_id=None, - assistant_id="agent", - input={"messages": [{"role": "user", "content": "Hello"}]}, - headers={"X-Encryption-Context": encryption_context}, -) +@encryption.encrypt.json.assistant +async def encrypt_assistant(ctx: EncryptionContext, data: dict) -> dict: + if ctx.field == "context": + # Assistant context may contain API keys or system prompts—encrypt everything + return encrypt_with_skip_fields(data, skip_fields=set()) + # Assistant metadata—skip system fields + return encrypt_with_skip_fields(data, SKIP_FIELDS) + +@encryption.decrypt.json +async def default_decrypt(ctx: EncryptionContext, data: dict) -> dict: + return decrypt_encrypted_fields(data) + +@encryption.decrypt.json.assistant +async def decrypt_assistant(ctx: EncryptionContext, data: dict) -> dict: + return decrypt_encrypted_fields(data) ``` - -The encryption context is stored with encrypted data. On decryption, it's automatically restored—callers don't need to pass the header when reading. - +Supported model types: `thread`, `assistant`, `run`, `cron`, `checkpoint`. -## Deriving context from authentication +#### Deriving context from authentication Instead of passing `X-Encryption-Context` explicitly, derive encryption context from the authenticated user: @@ -214,39 +244,34 @@ async def get_encryption_context(user: BaseUser, ctx: EncryptionContext) -> dict This handler runs once per request after authentication. The returned dict becomes `ctx.metadata` for all encryption operations in that request. -## Model-specific handlers +### Passing encryption context -Register different handlers for different model types using `@encryption.encrypt.json.assistant`, `@encryption.encrypt.json.run`, etc. Use `ctx.field` to vary behavior by field: +Pass encryption context via the `X-Encryption-Context` header. The context is arbitrary data that you define—you control the schema and can include any fields your encryption logic needs (e.g., `tenant_id`, `key_version`). The context is available in your handlers as `ctx.metadata` and is stored alongside encrypted data for use during decryption. ```python -from langgraph_sdk import Encryption, EncryptionContext - -encryption = Encryption() - -@encryption.encrypt.json -async def default_encrypt(ctx: EncryptionContext, data: dict) -> dict: - return encrypt_with_skip_fields(data, SKIP_FIELDS) +import base64 +import json +from langgraph_sdk import get_client -@encryption.encrypt.json.assistant -async def encrypt_assistant(ctx: EncryptionContext, data: dict) -> dict: - if ctx.field == "context": - # Assistant context may contain API keys or system prompts—encrypt everything - return encrypt_with_skip_fields(data, skip_fields=set()) - # Assistant metadata—skip system fields - return encrypt_with_skip_fields(data, SKIP_FIELDS) +encryption_context = base64.b64encode( + json.dumps({"tenant_id": "tenant-a"}).encode() +).decode() -@encryption.decrypt.json -async def default_decrypt(ctx: EncryptionContext, data: dict) -> dict: - return decrypt_encrypted_fields(data) +client = get_client(url="http://localhost:2024") -@encryption.decrypt.json.assistant -async def decrypt_assistant(ctx: EncryptionContext, data: dict) -> dict: - return decrypt_encrypted_fields(data) +result = await client.runs.wait( + thread_id=None, + assistant_id="agent", + input={"messages": [{"role": "user", "content": "Hello"}]}, + headers={"X-Encryption-Context": encryption_context}, +) ``` -Supported model types: `thread`, `assistant`, `run`, `cron`, `checkpoint`. + +The encryption context is stored with encrypted data. On decryption, it's automatically restored—callers don't need to pass the header when reading. + -## Envelope encryption with AWS Encryption SDK +### Envelope encryption with AWS Encryption SDK For production deployments on AWS, use the [AWS Encryption SDK](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/python.html) with AWS KMS, or an equivalent within your cloud provider. This approach: @@ -255,7 +280,7 @@ For production deployments on AWS, use the [AWS Encryption SDK](https://docs.aws - Binds ciphertext to encryption context (tenant isolation) - Caches data keys locally to avoid repeated KMS calls, latency and rate limits -### Complete example +#### Complete example ```python import base64 @@ -343,7 +368,7 @@ async def decrypt_json(ctx: EncryptionContext, data: dict) -> dict: The `encryption_context` is cryptographically bound to the ciphertext via KMS—decryption fails if the context doesn't match. The context is embedded in the ciphertext, so decrypt handlers don't need to reference `ctx.metadata`. -### Key rotation +#### Key rotation KMS handles master key rotation automatically. When you enable automatic rotation on your KMS key, old encrypted data keys can still be decrypted while new operations use the rotated key material. No re-encryption of existing data is required. From 6781cbdbd108598db5ac8a9effe6551780d714cc Mon Sep 17 00:00:00 2001 From: Connor Braa Date: Thu, 4 Dec 2025 11:27:05 -0800 Subject: [PATCH 8/9] fix what gets encrypted header nesting Signed-off-by: Connor Braa --- src/langsmith/encryption.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/langsmith/encryption.mdx b/src/langsmith/encryption.mdx index 8e122b096b..562e5286d8 100644 --- a/src/langsmith/encryption.mdx +++ b/src/langsmith/encryption.mdx @@ -179,7 +179,7 @@ async def decrypt_json(ctx: EncryptionContext, data: dict) -> dict: return result ``` -##### What gets encrypted +#### What gets encrypted **JSON handlers** (`@encryption.encrypt.json` / `@encryption.decrypt.json`): From 35cd9e24da73d6d2b1725bd4b9bdb7d501fc1b92 Mon Sep 17 00:00:00 2001 From: Connor Braa Date: Thu, 4 Dec 2025 15:27:51 -0800 Subject: [PATCH 9/9] document aes->custom migration behavior Signed-off-by: Connor Braa --- src/langsmith/encryption.mdx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/langsmith/encryption.mdx b/src/langsmith/encryption.mdx index 562e5286d8..a37d6b7a57 100644 --- a/src/langsmith/encryption.mdx +++ b/src/langsmith/encryption.mdx @@ -67,6 +67,10 @@ Add your encryption module to `langgraph.json`: } ``` + +If you're already using `LANGGRAPH_AES_KEY`, keep it configured—custom encryption replaces AES for new writes, but existing AES-encrypted data will still be readable. + + ### Defining your encryption module #### Blob encryption (checkpoints)