A neural attention model for sentence summarization

Post on 15-Apr-2017

55 views 0 download

transcript

x

M x1, ...,xM

y1, ...,yNy N

y (N < M)

argmax s(x,y)y 2

s

argmax s(x,y)y 2

argmax s(x,x[m1,...,mN ])m 2 {1, ...,M}N

m 2 {1, ...,M}N , mi�1 < mi

argmax s(x,x[m1,...,mN ])

x[i,j,k]

s(x,y) ⇡N�1X

i=0

g(yi+1,x,yc)

log p(y|x; ✓) ⇡N�1X

i=0

log p(yi+1|x,yc; ✓)

s(x,y) = log p(y|x; ✓)

Cyc

Cs(x,y) ⇡N�1X

i=0

g(yi+1,x,yc)

log p(y|x; ✓) ⇡N�1X

i=0

log p(yi+1|x,yc; ✓)

s(x,y) = log p(y|x; ✓)

C

yc

s(x,y) ⇡N�1X

i=0

g(yi+1,x,yc)

log p(y|x; ✓) ⇡N�1X

i=0

log p(yi+1|x,yc; ✓)

s(x,y) = log p(y|x; ✓)

Cyc

x

y y

argmax s(x,y)y 2

argmax s(x,y)y 2

argmax s(x,x[m1,...,mN ])m 2 {1, ...,M}N

m 2 {1, ...,M}N , mi�1 < mi

argmax s(x,x[m1,...,mN ])

s(x,y) = log p(y|x; ✓) ⇡N�1X

i=0

log p(yi+1|x,yc; ✓)

log p(yi+1|x,yc; ✓)

p(yi+1|yc,x; ✓) / exp(Vh+Wenc(x,yc))

yc = [Eyi�C+1, ...,Eyi]h = tanh(Uyc)

✓ = (E,U,V,W)

p(yi+1|yc,x; ✓) / exp(Vh+Wenc(x,yc))

yc = [Eyi�C+1, ...,Eyi]h = tanh(Uyc)

✓ = (E,U,V,W)

E =Eyi

p(yi+1|yc,x; ✓) / exp(Vh+Wenc(x,yc))

yc = [Eyi�C+1, ...,Eyi]h = tanh(Uyc)

✓ = (E,U,V,W)

ycU h

tanhCV

H H

p(yi+1|yc,x; ✓) / exp(Vh+Wenc(x,yc))

yc = [Eyi�C+1, ...,Eyi]h = tanh(Uyc)

✓ = (E,U,V,W)

H H

V

Vh +

+ V V

W enc(x,yc)

p(yi+1|yc,x; ✓) / exp(Vh+Wenc(x,yc))

yc = [Eyi�C+1, ...,Eyi]h = tanh(Uyc)

✓ = (E,U,V,W)

enc1(x,yc) = p

Tx

p = [1/M, ..., 1/M ] x = [Fx1, ...,FxM ]

8i, l 2 {1, ..., L}, xlj = tanh(max{xl

2i�1, xl2i})

x

0 = [Fx1, ...,FxM ]

8j, enc2(x,yc)j = max x

Li,j

i

8i, l 2 {1, ..., L}, xli = Q

lx

l�1[1�Q,...,1+Q]

enc3(x,yc) = p

Tx

p / exp(xPy

0

c)

y0

c = [Gyi�C+1, ...,Gyi]

x = [Fx1, ...,FxM ]

i+Q

q = i�Q

8i xi =X

xq/Q

(x(1),y(1)), ..., (x(J),y(J))

J

y

⇤ = argmax

Xg(yi+1,x,yc)

y 2

N � 1

i = 0

y⇤

s(y,x) =N�1X

i=0

↵T f(yi+1,x,yc)

f(yi+1,x,yc)

↵ =< 1, 0, ..., 0 >