Просмотр исходного кода

fix(db): prevent deadlock in concurrent melt quote locking (#1474)

When multiple wallets attempt to melt quotes sharing the same
request_lookup_id concurrently, the previous two-step locking approach
(lock target quote, then lock all related quotes) could cause deadlocks.

Each transaction would hold its own quote's lock while waiting for locks
held by other transactions, creating a circular wait condition that
PostgreSQL would resolve by aborting transactions.

Fix by introducing lock_melt_quote_and_related() which acquires all
related quote locks atomically in a single query, ordered by ID. This
ensures consistent lock ordering across transactions, eliminating the
circular wait condition.
tsk 4 недель назад
Родитель
Сommit
c99550f1c9

+ 42 - 0
crates/cdk-common/src/database/mint/mod.rs

@@ -41,6 +41,18 @@ pub struct MeltRequestInfo {
     pub change_outputs: Vec<BlindedMessage>,
     pub change_outputs: Vec<BlindedMessage>,
 }
 }
 
 
+/// Result of locking a melt quote and all related quotes atomically.
+///
+/// This struct is returned by [`QuotesTransaction::lock_melt_quote_and_related`]
+/// and contains both the target quote and all quotes sharing the same `request_lookup_id`.
+#[derive(Debug)]
+pub struct LockedMeltQuotes {
+    /// The target quote that was requested, if found
+    pub target: Option<Acquired<MeltQuote>>,
+    /// All quotes sharing the same `request_lookup_id` (including the target)
+    pub all_related: Vec<Acquired<MeltQuote>>,
+}
+
 /// KeysDatabaseWriter
 /// KeysDatabaseWriter
 #[async_trait]
 #[async_trait]
 pub trait KeysDatabaseTransaction<'a, Error>: DbTransactionFinalizer<Err = Error> {
 pub trait KeysDatabaseTransaction<'a, Error>: DbTransactionFinalizer<Err = Error> {
@@ -177,6 +189,36 @@ pub trait QuotesTransaction {
         request_lookup_id: &PaymentIdentifier,
         request_lookup_id: &PaymentIdentifier,
     ) -> Result<Vec<Acquired<MeltQuote>>, Self::Err>;
     ) -> Result<Vec<Acquired<MeltQuote>>, Self::Err>;
 
 
+    /// Locks a melt quote and all related quotes sharing the same request_lookup_id atomically.
+    ///
+    /// This method prevents deadlocks by acquiring all locks in a single query with consistent
+    /// ordering, rather than locking the target quote first and then related quotes separately.
+    ///
+    /// # Deadlock Prevention
+    ///
+    /// When multiple transactions try to melt quotes sharing the same `request_lookup_id`,
+    /// acquiring locks in two steps (first the target quote, then all related quotes) can cause
+    /// circular wait deadlocks. This method avoids that by:
+    /// 1. Using a subquery to find the `request_lookup_id` for the target quote
+    /// 2. Locking ALL quotes with that `request_lookup_id` in one atomic operation
+    /// 3. Ordering locks consistently by quote ID
+    ///
+    /// # Arguments
+    ///
+    /// * `quote_id` - The ID of the target melt quote
+    ///
+    /// # Returns
+    ///
+    /// A [`LockedMeltQuotes`] containing:
+    /// - `target`: The target quote (if found)
+    /// - `all_related`: All quotes sharing the same `request_lookup_id` (including the target)
+    ///
+    /// If the quote has no `request_lookup_id`, only the target quote is returned and locked.
+    async fn lock_melt_quote_and_related(
+        &mut self,
+        quote_id: &QuoteId,
+    ) -> Result<LockedMeltQuotes, Self::Err>;
+
     /// Updates the request lookup id for a melt quote.
     /// Updates the request lookup id for a melt quote.
     ///
     ///
     /// Requires an [`Acquired`] melt quote to ensure the row is locked before modification.
     /// Requires an [`Acquired`] melt quote to ensure the row is locked before modification.

+ 78 - 1
crates/cdk-sql-common/src/mint/mod.rs

@@ -16,7 +16,8 @@ use std::sync::Arc;
 use async_trait::async_trait;
 use async_trait::async_trait;
 use bitcoin::bip32::DerivationPath;
 use bitcoin::bip32::DerivationPath;
 use cdk_common::database::mint::{
 use cdk_common::database::mint::{
-    CompletedOperationsDatabase, CompletedOperationsTransaction, SagaDatabase, SagaTransaction,
+    CompletedOperationsDatabase, CompletedOperationsTransaction, LockedMeltQuotes, SagaDatabase,
+    SagaTransaction,
 };
 };
 use cdk_common::database::{
 use cdk_common::database::{
     self, Acquired, ConversionError, DbTransactionFinalizer, Error, MintDatabase,
     self, Acquired, ConversionError, DbTransactionFinalizer, Error, MintDatabase,
@@ -702,6 +703,75 @@ where
         .collect::<Result<Vec<_>, _>>()
         .collect::<Result<Vec<_>, _>>()
 }
 }
 
 
+/// Locks a melt quote and all related quotes atomically to prevent deadlocks.
+///
+/// This function acquires all locks in a single query with consistent ordering (by ID),
+/// preventing the circular wait condition that can occur when locks are acquired in
+/// separate queries.
+#[inline]
+async fn lock_melt_quote_and_related_inner<T>(
+    executor: &T,
+    quote_id: &QuoteId,
+) -> Result<LockedMeltQuotes, Error>
+where
+    T: DatabaseExecutor,
+{
+    // Use a single query with subquery to atomically lock:
+    // 1. All quotes with the same request_lookup_id as the target quote, OR
+    // 2. Just the target quote if it has no request_lookup_id
+    //
+    // The ORDER BY ensures consistent lock acquisition order across transactions,
+    // preventing deadlocks.
+    let query_str = r#"
+        SELECT
+            id,
+            unit,
+            amount,
+            request,
+            fee_reserve,
+            expiry,
+            state,
+            payment_preimage,
+            request_lookup_id,
+            created_time,
+            paid_time,
+            payment_method,
+            options,
+            request_lookup_id_kind
+        FROM
+            melt_quote
+        WHERE
+            (
+                request_lookup_id IS NOT NULL
+                AND request_lookup_id = (SELECT request_lookup_id FROM melt_quote WHERE id = :quote_id)
+                AND request_lookup_id_kind = (SELECT request_lookup_id_kind FROM melt_quote WHERE id = :quote_id)
+            )
+            OR
+            (
+                id = :quote_id
+                AND (SELECT request_lookup_id FROM melt_quote WHERE id = :quote_id) IS NULL
+            )
+        ORDER BY id
+        FOR UPDATE
+        "#;
+
+    let all_quotes: Vec<mint::MeltQuote> = query(query_str)?
+        .bind("quote_id", quote_id.to_string())
+        .fetch_all(executor)
+        .await?
+        .into_iter()
+        .map(sql_row_to_melt_quote)
+        .collect::<Result<Vec<_>, _>>()?;
+
+    // Find the target quote from the locked set
+    let target_quote = all_quotes.iter().find(|q| &q.id == quote_id).cloned();
+
+    Ok(LockedMeltQuotes {
+        target: target_quote.map(|q| q.into()),
+        all_related: all_quotes.into_iter().map(|q| q.into()).collect(),
+    })
+}
+
 #[async_trait]
 #[async_trait]
 impl<RM> MintKeyDatabaseTransaction<'_, Error> for SQLTransaction<RM>
 impl<RM> MintKeyDatabaseTransaction<'_, Error> for SQLTransaction<RM>
 where
 where
@@ -1298,6 +1368,13 @@ where
             .map(|quote| quote.into_iter().map(|inner| inner.into()).collect())
             .map(|quote| quote.into_iter().map(|inner| inner.into()).collect())
     }
     }
 
 
+    async fn lock_melt_quote_and_related(
+        &mut self,
+        quote_id: &QuoteId,
+    ) -> Result<LockedMeltQuotes, Self::Err> {
+        lock_melt_quote_and_related_inner(&self.inner, quote_id).await
+    }
+
     async fn get_mint_quote_by_request(
     async fn get_mint_quote_by_request(
         &mut self,
         &mut self,
         request: &str,
         request: &str,

+ 25 - 32
crates/cdk/src/mint/melt/shared.rs

@@ -247,19 +247,23 @@ pub async fn process_melt_change(
 /// Loads a melt quote and acquires exclusive locks on all related quotes.
 /// Loads a melt quote and acquires exclusive locks on all related quotes.
 ///
 ///
 /// This function combines quote loading with defensive locking to prevent race conditions in BOLT12
 /// This function combines quote loading with defensive locking to prevent race conditions in BOLT12
-/// scenarios where multiple melt quotes can share the same `request_lookup_id`. It performs three
-/// operations atomically:
+/// scenarios where multiple melt quotes can share the same `request_lookup_id`. It performs the
+/// following operations atomically in a single query:
 ///
 ///
-/// 1. Loads the melt quote by ID 2. Acquires row-level locks on all sibling quotes sharing the same
-///    lookup identifier 3. Validates that no sibling quote is already in `Pending` or `Paid` state
+/// 1. Acquires row-level locks on ALL quotes sharing the same lookup identifier (including target)
+/// 2. Returns the target quote and validates no sibling is already `Pending` or `Paid`
 ///
 ///
-/// This ensures that when a melt operation begins, no other concurrent melt can proceed on a
-/// related quote, preventing double-spending and state inconsistencies.
+/// # Deadlock Prevention
+///
+/// This function uses a single atomic query to lock all related quotes at once, ordered by ID.
+/// This prevents deadlocks that would occur if we locked the target quote first, then tried to
+/// lock related quotes separately - concurrent transactions would each hold one lock and wait
+/// for the other, creating a circular wait condition.
 ///
 ///
 /// # Arguments
 /// # Arguments
 ///
 ///
-/// * `tx` - The active database transaction used to load and acquire locks. * `quote_id` - The ID
-///   of the melt quote to load and process.
+/// * `tx` - The active database transaction used to load and acquire locks.
+/// * `quote_id` - The ID of the melt quote to load and process.
 ///
 ///
 /// # Returns
 /// # Returns
 ///
 ///
@@ -267,41 +271,30 @@ pub async fn process_melt_change(
 ///
 ///
 /// # Errors
 /// # Errors
 ///
 ///
-/// * [`Error::UnknownQuote`] if no quote exists with the given ID. * [`Error::Database(Duplicate)`]
-///   if another quote with the same lookup ID is already pending or paid, indicating a conflicting
-///   concurrent melt operation.
+/// * [`Error::UnknownQuote`] if no quote exists with the given ID.
+/// * [`Error::Database(Duplicate)`] if another quote with the same lookup ID is already pending
+///   or paid, indicating a conflicting concurrent melt operation.
 pub async fn load_melt_quotes_exclusively(
 pub async fn load_melt_quotes_exclusively(
     tx: &mut Box<dyn database::MintTransaction<database::Error> + Send + Sync>,
     tx: &mut Box<dyn database::MintTransaction<database::Error> + Send + Sync>,
     quote_id: &QuoteId,
     quote_id: &QuoteId,
 ) -> Result<Acquired<MeltQuote>, Error> {
 ) -> Result<Acquired<MeltQuote>, Error> {
-    let quote = tx
-        .get_melt_quote(quote_id)
+    // Lock ALL related quotes in a single atomic query to prevent deadlocks.
+    // The query locks quotes ordered by ID, ensuring consistent lock acquisition order
+    // across concurrent transactions.
+    let locked = tx
+        .lock_melt_quote_and_related(quote_id)
         .await
         .await
         .map_err(|e| match e {
         .map_err(|e| match e {
             database::Error::Locked => {
             database::Error::Locked => {
-                tracing::warn!("Quote {quote_id} is locked by another process");
+                tracing::warn!("Quote {quote_id} or related quotes are locked by another process");
                 database::Error::Duplicate
                 database::Error::Duplicate
             }
             }
             e => e,
             e => e,
-        })?
-        .ok_or(Error::UnknownQuote)?;
-
-    // Lock any other quotes so they cannot be modified
-    let locked_quotes = if let Some(request_lookup_id) = quote.request_lookup_id.as_ref() {
-        tx.get_melt_quotes_by_request_lookup_id(request_lookup_id)
-            .await
-            .map_err(|e| match e {
-                database::Error::Locked => {
-                    tracing::warn!("Quotes with request_lookyup_id {request_lookup_id} is locked by another process");
-                    database::Error::Duplicate
-                }
-                e => e,
-            })?
-    } else {
-        vec![]
-    };
+        })?;
+
+    let quote = locked.target.ok_or(Error::UnknownQuote)?;
 
 
-    if locked_quotes.iter().any(|locked_quote| {
+    if locked.all_related.iter().any(|locked_quote| {
         locked_quote.id != quote.id
         locked_quote.id != quote.id
             && (locked_quote.state == MeltQuoteState::Pending
             && (locked_quote.state == MeltQuoteState::Pending
                 || locked_quote.state == MeltQuoteState::Paid)
                 || locked_quote.state == MeltQuoteState::Paid)