近端算子求解分布式约束优化问题

问题

mini=1nw12peifh(θi)pbi2+w22θiθi2s.t.L(pepe)=0,peiPi,θiΘi(1)\begin{aligned} \min &\quad \sum_{i=1}^n\frac{w_1}{2}\|p_{e_i}-f_h(\theta_i)-p_{b_i}^*\|^2+\frac{w_2}{2}\|\theta_i-\theta_i^*\|^2 \\ s.t.&\quad L(p_e-p_e^*)=0,\\ &\quad p_{e_i}\in \mathcal P_i,\theta_i\in\Theta_i \end{aligned}\qquad (1)

近端算子

proxf(v)=argminxf(x)+12xv2\operatorname{prox}_f(v)=\arg\min_x f(x)+\frac{1}{2}\|x-v\|^2

可知x=proxf(v)x=\operatorname{prox}_f(v)等价于vxf(x)v-x\in\nabla f(x)​。

紧凑形式

minf1(pe,θ)+f2(θ)+g1(pe)+g2(θ)s.t.L(pepe)=0(2)\begin{aligned} \min&\quad f_1(p_e,\theta)+f_2(\theta)+g_1(p_e)+g_2(\theta)\\ s.t.&\quad L(p_e-p_e^*)=0 \end{aligned}\qquad(2)

拉格朗日函数

L(pe,θ,λ)=f1(pe,θ)+f2(θ)+λTL(pepe)+g1(pe)+g2(θ)(3)\mathcal L(p_e,\theta,\lambda)=f_1(p_e,\theta)+f_2(\theta)+\lambda^TL(p_e-p_e^*)+g_1(p_e)+g_2(\theta)\qquad (3)

根据KKT条件

0pef1(p^e,θ^)Lλ^g1(p^e)0θf1(p^e,θ^)f2(θ^)g2(θ^)0=L(p^epe)(4)\begin{aligned} 0&\in-\nabla_{p_e}f_1(\hat p_e,\hat \theta)-L\hat\lambda-\partial g_1(\hat p_e)\\ 0&\in-\nabla_{\theta} f_1(\hat p_e,\hat \theta)-\nabla f_2(\hat \theta)-\partial g_2(\hat \theta)\\ 0&=L(\hat p_e-p_e^*) \end{aligned}\qquad (4)

等价于

0=proxg1(p^epef1(p^e,θ^)Lλ^)p^e0=proxg2(θ^θf1(p^e,θ^)f2(θ^))θ^0=L(p^epe)(5)\begin{aligned} 0&=\operatorname{prox}_{g_1}(\hat p_e-\nabla_{p_e}f_1(\hat p_e,\hat \theta)-L\hat\lambda)-\hat p_e\\ 0&=\operatorname{prox}_{g_2}(\hat \theta-\nabla_{\theta} f_1(\hat p_e,\hat \theta)-\nabla f_2(\hat \theta))-\hat \theta\\ 0&=L(\hat p_e-p_e^*) \end{aligned}\qquad (5)

算法

p˙e=proxg1(pepef1(pe,θ)Lλ)peθ˙=proxg2(θθf1(pe,θ)f2(θ))θλ˙=L(pe+p˙epe)(6)\begin{aligned} \dot p_e&=\operatorname{prox}_{g_1}(p_e-\nabla_{p_e}f_1(p_e,\theta)-L\lambda)-p_e\\ \dot \theta&=\operatorname{prox}_{g_2}(\theta-\nabla_{\theta} f_1(p_e,\theta)-\nabla f_2(\theta))-\theta\\ \dot \lambda&=L(p_e+\dot p_e-p_e^*) \end{aligned}\qquad (6)

定理:假设图无向连通,且Slater条件满足。则

  1. (6)的平衡点李雅普诺夫稳定,解(pe(t),θ(t))(p_e(t),\theta(t))有界;
  2. (pe(t),θ(t),λ(t))(p_e(t),\theta(t),\lambda(t))收敛,且limt(pe(t),θ(t))\lim_{t\to\infty}(p_e(t),\theta(t))是问题(2)的解。

证明:1、李雅普诺夫函数

V=12pep^e2+12θθ^2+12λλ^2+f2(θ)f2(θ^)(θθ^)Tf2(θ^)+f1(pe,θ)f1(p^e,θ^)(pep^e)Tpef1(p^e,θ^)(θθ^)Tθf1(p^e,θ^)\begin{aligned} V&=\frac{1}{2}\|p_e-\hat p_e\|^2+\frac{1}{2}\|\theta-\hat \theta\|^2+\frac{1}{2}\|\lambda-\hat \lambda\|^2+f_2(\theta)-f_2(\hat\theta)-(\theta-\hat \theta)^T\nabla f_2(\hat \theta)\\ &\quad+f_1(p_e,\theta)-f_1(\hat p_e,\hat\theta)-(p_e-\hat p_e)^T\nabla_{p_e}f_1(\hat p_e,\hat \theta)-(\theta-\hat \theta)^T\nabla_{\theta}f_1(\hat p_e,\hat \theta) \end{aligned}

(6)的前两个等式可以重写为

p˙e+pe=proxg1(pepef1(pe,θ)Lλ)θ˙+θ=proxg2(θθf1(pe,θ)f2(θ))\begin{aligned} \dot p_e+p_e&=\operatorname{prox}_{g_1}(p_e-\nabla_{p_e}f_1(p_e,\theta)-L\lambda)\\ \dot \theta+\theta&=\operatorname{prox}_{g_2}(\theta-\nabla_{\theta} f_1(p_e,\theta)-\nabla f_2(\theta)) \end{aligned}

x=proxf(v)x=\operatorname{prox}_f(v)等价于vxf(x)v-x\in\nabla f(x)可得

pepef1(pe,θ)Lλ(p˙e+pe)g1(p˙e+pe)θθf1(pe,θ)f2(θ)(θ˙+θ)g2(θ˙+θ)(7)\begin{aligned} p_e-\nabla_{p_e}f_1(p_e,\theta)-L\lambda-(\dot p_e+p_e)&\in\partial g_1(\dot p_e+p_e)\\ \theta-\nabla_{\theta} f_1(p_e,\theta)-\nabla f_2(\theta)-(\dot \theta+\theta)&\in\partial g_2(\dot \theta+\theta)\\ \end{aligned}\qquad (7)

(p^e,θ^,λ^)(\hat p_e,\hat \theta,\hat \lambda)​为(6)的平衡点。则由(5)可得

pef1(p^e,θ^)Lλ^g1(p^e)θf1(p^e,θ^)f2(θ^)g2(θ^)(8)\begin{aligned} -\nabla_{p_e}f_1(\hat p_e,\hat \theta)-L\hat \lambda&\in\partial g_1(\hat p_e)\\ -\nabla_{\theta} f_1(\hat p_e,\hat \theta)-\nabla f_2(\hat \theta)&\in\partial g_2(\hat \theta)\\ \end{aligned}\qquad (8)

因为g1,g2g_1,g_2是凸的,所以g1,g2\partial g_1,\partial g_2​单调。结合(7)(8)有

((pef1(pe,θ)pef1(p^e,θ^))L(λλ^)p˙e)T(p˙e+pep^e)0(9)((θf1(pe,θ)θf1(p^e,θ^))(f2(θ)f2(θ^))θ˙)T(θ˙+θθ^)0(10)\begin{aligned} (-(\nabla_{p_e}f_1(p_e,\theta)-\nabla_{p_e}f_1(\hat p_e,\hat \theta))-L(\lambda-\hat \lambda)-\dot p_e)^T(\dot p_e+p_e-\hat p_e)&\geq 0\qquad (9)\\ (-(\nabla_{\theta} f_1(p_e,\theta)-\nabla_{\theta} f_1(\hat p_e,\hat \theta))-(\nabla f_2(\theta)-\nabla f_2(\hat \theta))-\dot \theta)^T(\dot \theta+\theta-\hat \theta)&\geq 0\qquad (10) \end{aligned}

由平衡点定义(5),得L(p^epe)=0L(\hat p_e-p_e^*)=0

由(9)得

(pef1(pe,θ)pef1(p^e,θ^))T(pep^e)(λλ^)TL(pe+p˙epe)p˙eTp˙ep˙eT(pep^e)+p˙eT(pef1(pe,θ)pef1(p^e,θ^))\begin{aligned} &-(\nabla_{p_e}f_1(p_e,\theta)-\nabla_{p_e}f_1(\hat p_e,\hat \theta))^T(p_e-\hat p_e)-(\lambda-\hat \lambda)^TL(p_e+\dot p_e-p_e^*)-\dot p_e^T\dot p_e\\ &\geq \dot p_e^T(p_e-\hat p_e)+\dot p_e^T(\nabla_{p_e}f_1(p_e,\theta)-\nabla_{p_e}f_1(\hat p_e,\hat \theta)) \end{aligned}

同理,由(10)得

(θf1(pe,θ)θf1(p^e,θ^))T(θθ^)(f2(θ)f2(θ^))T(θθ^)θ˙Tθ˙θ˙T(θθ^)+θ˙T(θf1(pe,θ)θf1(p^e,θ^))+θ˙T(f2(θ)f2(θ^))\begin{aligned} &-(\nabla_{\theta} f_1(p_e,\theta)-\nabla_{\theta} f_1(\hat p_e,\hat \theta))^T(\theta-\hat \theta)-(\nabla f_2(\theta)-\nabla f_2(\hat \theta))^T(\theta-\hat \theta)-\dot \theta^T\dot \theta\\ &\geq \dot \theta^T(\theta-\hat \theta)+\dot \theta^T(\nabla_{\theta} f_1(p_e,\theta)-\nabla_{\theta} f_1(\hat p_e,\hat \theta))+\dot \theta^T(\nabla f_2(\theta)-\nabla f_2(\hat \theta)) \end{aligned}

此外,

λ˙T(λλ^)=(λλ^)TL(pe+p˙epe)\dot \lambda^T(\lambda-\hat \lambda)=(\lambda-\hat \lambda)^TL(p_e+\dot p_e-p_e^*)

综上所述,李雅普诺夫函数对时间的导数为

V˙=p˙eT(pep^e)+θ˙T(θθ^)+λ˙T(λλ^)+p˙eT(pef1(pe,θ)pef1(p^e,θ^))+θ˙T(θf1(pe,θ)θf1(p^e,θ^))+θ˙T(f2(θ)f2(θ^))p˙eTp˙eθ˙Tθ˙(pef1(pe,θ)pef1(p^e,θ^))T(pep^e)(θf1(pe,θ)θf1(p^e,θ^))T(θθ^)(f2(θ)f2(θ^))T(θθ^)p˙e2θ˙20\begin{aligned} \dot V&=\dot p_e^T(p_e-\hat p_e)+\dot \theta^T(\theta-\hat \theta)+\dot \lambda^T(\lambda-\hat \lambda)+\dot p_e^T(\nabla_{p_e}f_1(p_e,\theta)-\nabla_{p_e}f_1(\hat p_e,\hat \theta))\\ &\quad +\dot \theta^T(\nabla_{\theta} f_1(p_e,\theta)-\nabla_{\theta} f_1(\hat p_e,\hat \theta))+\dot \theta^T(\nabla f_2(\theta)-\nabla f_2(\hat \theta))\\ &\leq -\dot p_e^T\dot p_e-\dot \theta^T\dot \theta-(\nabla_{p_e}f_1(p_e,\theta)-\nabla_{p_e}f_1(\hat p_e,\hat \theta))^T(p_e-\hat p_e)\\ &\quad-(\nabla_{\theta} f_1(p_e,\theta)-\nabla_{\theta} f_1(\hat p_e,\hat \theta))^T(\theta-\hat \theta)-(\nabla f_2(\theta)-\nabla f_2(\hat \theta))^T(\theta-\hat \theta)\\ &\leq-\|\dot p_e\|^2-\|\dot \theta\|^2\leq 0 \end{aligned}

因为f1,f2f_1,f_2是凸的,所以f1,f2\nabla f_1,\nabla f_2单调,故第二个不等式成立。

因此(pe(t),θ(t),λ(t))M(p_e(t),\theta(t),\lambda(t))\to \mathcal M​,其中M\mathcal M​是{(pe,θ,λ)p˙e=0,θ˙=0}\{(p_e,\theta,\lambda)| \dot p_e=0,\dot \theta=0 \}​中的最大不变集。对于任何起始于M\mathcal M​的解轨迹(pˉe(t),θˉ(t),λˉ(t))(\bar p_e(t),\bar \theta(t),\bar \lambda(t))​,取(pˉe(0),θˉ(0),λˉ(0))M(\bar p_e(0),\bar \theta(0),\bar \lambda(0))\in\mathcal M​,都满足pˉe˙(t)0\dot {\bar p_e}(t)\equiv 0θˉ˙(t)0\dot {\bar \theta}(t)\equiv0,代入(5)得

λˉ˙(t)L(pˉepe)\dot {\bar \lambda}(t)\equiv L(\bar p_e-p_e^*)

那么λˉ˙(t)0\dot {\bar \lambda}(t)\equiv 0,否则λˉ(t)\bar \lambda(t)趋于无穷大,与稳定性矛盾。因此M{(pe,θ,λ)p˙e=0,θ˙=0,λ˙=0}\mathcal M\subset\{(p_e,\theta,\lambda)| \dot p_e=0,\dot \theta=0,\dot \lambda=0 \},即M\mathcal M里的每一个点都是平衡点。然后又有每个平衡点都李雅普诺夫稳定,故(pe,θ,λ)(p_e,\theta,\lambda)最终收敛到一个平衡点。