Date post: | 15-Apr-2017 |
Category: |
Real Estate |
Upload: | brian-moran |
View: | 221 times |
Download: | 3 times |
Marvin & MeMACHINE LEARNING
adactushousing.co.uk
IN HOUSING
http://linkd.in/1IxgqDbBrian Moran
Adapted and updated from work by Michelle Zappa http://www.michellzappa.com
“The field of study interested in the development of computer algorithms for transforming data into intelligent action”
Machine Learning with R, Lantz, 2013
What on earth could this possibly have to do with housing?
Marvin & YouCheck email
Naive Bayes
Internet search
PageRank
Watch Netflix
Boltzman machine
Buy lunch
Artificial neural net
Use Sat-Nav
Dijkstra’s algorithm
Apply for a loan
Decision trees
Shop on Amazon
Matrix factorization
Get a letter
k-means clusteringDiagrams (but not text) from: http://machinelearningmastery.com/a-tour-of-machine-learning-algorithms/
Marvin & You
Machine Learning is a pervasive feature in modern life
Marvin & You
New or better servicesIncreased incomeReduced costs
The (hard) problem K-means solves
Anatomy of an Algorithm
The (hard) problem K-means solves
Find the positions of 'K' number of clusters that minimize the distance from the data points to the clusters.
Anatomy of an Algorithm
Example taken from 'Data Smart', John W Foreman
Anatomy of an Algorithm
Example taken from 'Data Smart', John W Foreman
y
x
Anatomy of an Algorithm
Example taken from 'Data Smart', John W Foreman
‘K’ = 3
Initialize starting positionsy
x
Anatomy of an Algorithm
Example taken from 'Data Smart', John W Foreman
‘K’ = 3
Initialize starting positions
Iterate positions and measure…y
x
Anatomy of an Algorithm
Example taken from 'Data Smart', John W Foreman
‘K’ = 3
Initialize starting positions
Iterate positions and measure…
… until optimal solution is found
y
x
Anatomy of an Algorithm
K-means pseudo code
Anatomy of an Algorithm
K-means code in R
func
tion
(x, c
ente
rs, i
ter.m
ax =
10L
, nst
art =
1L,
alg
orith
m =
c("H
artig
an-W
ong"
,
"Llo
yd",
"For
gy",
"Mac
Que
en"),
trac
e =
FALS
E)
{ .M
imax
<- .
Mac
hine
$int
eger
.max
do_
one
<- fu
nctio
n(nm
eth)
{
s
witc
h(nm
eth,
{
ist
eps.
Qtra
n <-
as.
inte
ger(m
in(.M
imax
, 50
* m))
iTran
<- c
(iste
ps.Q
tran,
inte
ger(m
ax(0
, k -
1)))
Z <-
.For
tran(
C_km
ns, x
, m, p
, cen
ters
= c
ente
rs,
a
s.in
tege
r(k),
c1 =
inte
ger(m
), c2
= in
tege
r(m),
nc
= in
tege
r(k),
doub
le(k
), do
uble
(k),
ncp
= in
tege
r(k),
D =
dou
ble(
m),
iTran
= iT
ran,
live
= in
tege
r(k),
iter
= it
er.m
ax, w
ss =
dou
ble(
k), i
faul
t = a
s.in
tege
r(tra
ce))
switc
h(Z$
ifaul
t, st
op("e
mpt
y cl
uste
r: try
a b
ette
r set
of i
nitia
l cen
ters
",
c
all.
= FA
LSE)
, Z$i
ter <
- max
(Z$i
ter,
iter.m
ax +
1L)
, sto
p("n
umbe
r of c
lust
er c
entre
s m
ust l
ie b
etwe
en 1
and
nro
w(x)
",
c
all.
= FA
LSE)
, war
ning
(get
text
f("Q
uick
-TRA
NSfe
r sta
ge s
teps
exc
eede
d m
axim
um (=
%d)
",
is
teps
.Qtra
n), c
all.
= FA
LSE)
)
}
, {
Z <-
.C(C
_km
eans
_Llo
yd, x
, m, p
, cen
ters
= c
ente
rs,
k
, c1
= in
tege
r(m),
iter =
iter
.max
, nc
= in
tege
r(k),
wss
= d
oubl
e(k)
)
}
, {
Z <-
.C(C
_km
eans
_Mac
Que
en, x
, m, p
, cen
ters
= a
s.do
uble
(cen
ters
),
k
, c1
= in
tege
r(m),
iter =
iter
.max
, nc
= in
tege
r(k),
wss
= d
oubl
e(k)
)
}
)
i
f (m
23 <
- any
(nm
eth
== c
(2L,
3L)
)) {
if (a
ny(Z
$nc
== 0
))
w
arni
ng("e
mpt
y cl
uste
r: try
a b
ette
r set
of i
nitia
l cen
ters
",
call.
= FA
LSE)
}
i
f (Z$
iter >
iter
.max
) {
warn
ing(
sprin
tf(ng
ette
xt(it
er.m
ax, "
did
not c
onve
rge
in %
d ite
ratio
n",
"d
id n
ot c
onve
rge
in %
d ite
ratio
ns"),
iter
.max
),
c
all.
= FA
LSE,
dom
ain
= NA
)
if
(m23
)
Z$i
faul
t <- 2
L
}
i
f (nm
eth
%in
% c
(2L,
3L)
) {
if (a
ny(Z
$nc
== 0
))
w
arni
ng("e
mpt
y cl
uste
r: try
a b
ette
r set
of i
nitia
l cen
ters
",
call.
= FA
LSE)
}
Z
}
x <
- as.
mat
rix(x
)
m <
- as.
inte
ger(n
row(
x))
if
(is.
na(m
))
sto
p("in
valid
nro
w(x)
")
p <
- as.
inte
ger(n
col(x
))
if (i
s.na
(p))
s
top(
"inva
lid n
col(x
)")
if
(miss
ing(
cent
ers)
)
s
top(
"'cen
ters
' mus
t be
a nu
mbe
r or a
mat
rix")
n
met
h <-
swi
tch(
mat
ch.a
rg(a
lgor
ithm
), `H
artig
an-W
ong`
= 1
L,
Llo
yd =
2L,
For
gy =
2L,
Mac
Que
en =
3L)
sto
rage
.mod
e(x)
<- "
doub
le"
if
(len
gth(
cent
ers)
==
1L) {
i
f (ce
nter
s ==
1)
nmet
h <-
3L
k <
- cen
ters
i
f (ns
tart
== 1
L)
cent
ers
<- x
[sam
ple.
int(m
, k),
, dro
p =
FALS
E]
if (
nsta
rt >=
2L
|| any
(dup
licat
ed(c
ente
rs)))
{
cn
<- u
niqu
e(x)
m
m <
- nro
w(cn
)
if
(mm
< k
)
sto
p("m
ore
clus
ter c
ente
rs th
an d
istin
ct d
ata
poin
ts.")
ce
nter
s <-
cn[
sam
ple.
int(m
m, k
), , d
rop
= FA
LSE]
}
}
else
{
c
ente
rs <
- as.
mat
rix(c
ente
rs)
if (
any(
dupl
icat
ed(c
ente
rs)))
st
op("i
nitia
l cen
ters
are
not
dist
inct
")
c
n <-
NUL
L
k
<- n
row(
cent
ers)
i
f (m
< k
)
st
op("m
ore
clus
ter c
ente
rs th
an d
ata
poin
ts")
}
k
<- a
s.in
tege
r(k)
if
(is.
na(k
))
sto
p("'in
valid
val
ue o
f 'k'"
)
iter
.max
<- a
s.in
tege
r(ite
r.max
)
if (i
s.na
(iter
.max
) || it
er.m
ax <
1L)
s
top(
"'iter
.max
' mus
t be
posit
ive")
if
(nco
l(x) !
= nc
ol(c
ente
rs))
s
top(
"mus
t hav
e sa
me
num
ber o
f col
umns
in 'x
' and
'cen
ters
'")
s
tora
ge.m
ode(
cent
ers)
<- "
doub
le"
Z
<- d
o_on
e(nm
eth)
bes
t <- s
um(Z
$wss
)
if (n
star
t >=
2L &
& !is
.nul
l(cn)
)
f
or (i
in 2
:nst
art)
{
ce
nter
s <-
cn[
sam
ple.
int(m
m, k
), , d
rop
= FA
LSE]
ZZ
<- d
o_on
e(nm
eth)
if
((z <
- sum
(ZZ$
wss)
) < b
est)
{
Z <
- ZZ
b
est <
- z
}
}
cen
ters
<- m
atrix
(Z$c
ente
rs, k
)
dim
nam
es(c
ente
rs) <
- list
(1L:
k, d
imna
mes
(x)[[
2L]])
clu
ster
<- Z
$c1
if
(!is.
null(
rn <
- row
nam
es(x
)))
nam
es(c
lust
er) <
- rn
to
tss
<- s
um(s
cale
(x, s
cale
= F
ALSE
)^2)
stru
ctur
e(lis
t(clu
ster
= c
lust
er, c
ente
rs =
cen
ters
, tot
ss =
tots
s,
with
inss
= Z
$wss
, tot
.with
inss
= b
est,
betw
eens
s =
tots
s -
best
, size
= Z
$nc,
iter
= Z
$ite
r, ifa
ult =
Z$i
faul
t),
cla
ss =
"km
eans
") } <b
ytec
ode:
0x7
f9a2
3016
278>
<e
nviro
nmen
t: na
mes
pace
:sta
ts>
Anatomy of an Algorithm
Marvin & Housing?
Clustering ForecastingPattern discovery Classification
Descriptive Predictive
When A or B happens, C tends to
follow…
These cases are a similar type…
Number X is going to change to Z
This new case looks like it’s of this type…
Spot warning signs of failing tenancies?
Targeted information and services?
Better planning of maintenance spend?
Automatically handle web enquiries?
Pattern discovery
When A or B happens, C tends to
follow…
Pattern Discovery: Rent Arrears
Pattern discovery
When A or B happens, C tends to
follow…
RENT ARREARS RISK
Pattern Discovery: Rent Arrears
RENT ARREARS RISK
Transactional data
OneR algorithm Missed gas appointments
Pattern Discovery: Rent Arrears
1R code demo in R
Results: 1R Missed Gas Appointments —> Rent arrears
Clustering: Targeting Services
Clustering
These cases are a similar type…
Clustering
These cases are a similar type…
THEY MIGHT WANT FINANCIAL ADVICE
Clustering: Targeting Services
THEY MIGHT WANT FINANCIAL ADVICE
Assign to cluster
k-means clustering
Clustering: Targeting Services
15% of tenants fall into clusters that are 2x at risk of arrears
K-means code demo in R
Results: K-means Clusters —> Rent arrears
Forecasting: Refining budgeting
Number X is going to change to Z
Forecasting
Forecasting: Refining budgeting
Number X is going to change to Z
Forecasting£ s N E E D E D T O REPLACE IN NEXT YEAR
Forecasting: Refining budgeting
£ s N E E D E D T O REPLACE IN NEXT YEAR
Boiler data
Artificial neural net
Still working on gathering the data
on this one…
Classification: Repairs
Classification
This new case looks like it’s of this type…
Classification
This new case looks like it’s of this type…
TRADE
Classification: Repairs
REPAIR REQUEST
TRADE
Classification: Repairs
REPAIR REQUEST
Check web form
Naive Bayes
“Our ba th room is leaking through the ceiling onto the stairs and the ceiling is wet through along with the walls where the taps are mounted. And puddle on the stairs”
PLUMBER
Classification: RepairsCheck web form
Naive Bayes
Check web form
Naive Bayes
bathroomleaking
ceilingstairsceiling
wet
walls taps
stairs>70%
accuracy
PLUMBER
Classification: Repairs
Naive Bayes code demo in R
Results: Naive Bayes Text —> Repair trade
Next steps…
Surveillance and ‘coveillance’
Customisation through segmentation
Automation and ‘friction-free’ services
Data-led decisions
New Opportunities
Medium-term Plans
One click repair adactus
This Year's Focus
Three Options1. Repair
request Delay Staffcheck
Staffinput
Repairordered
2. Repairrequest
Repairordered
3. Repairrequest
MachineLearning
Staffcheck
Staffinput
Repairordered
RepairorderedProblem? N
Y
-£300,000
-£225,000
-£150,000
-£75,000
£0
£75,000
£150,000
£225,000
£300,000
10% channel shift 20% channel shift 30% channel shift 40% channel shift 50% channel shift 60% channel shift 70% channel shift
Avoiding Pyrrhic VictoriesNe
t cos
t / b
enefi
t of r
epai
rs s
elf-s
ervic
e
90% accuracy
10% accuracy
50% accuracy
Marvin's Future Impact
New or better servicesIncreased incomeReduced costs
Marvin's Future Impact
Standard models for the sector?
Some disclaimers, caveats, warnings and a bit of existential worry
Business Understanding
Data Understanding
Data PreparationModelling
Evaluation
Deployment
The CRISP data mining process
Text Book Data Science
Business Understanding
Data Understanding
Data PreparationModelling
Evaluation
Deployment
The Truth of the MatterCount to ten
Garbage In, Garbage Out
Data Scientists are Unicorns
Adapted from http://www.anlytcs.com/2014/01/data-science-venn-diagram-v20.html
Data Science
Mathematics and Statistics
Research Methods
UNICORN
Machine Learning
Subject Matter Expertise
Programming Skills
Computer Science
Argumentum ad Verecundiam
Argumentum ad Verecundiam
Classification by Microsoft's projectoxford.ai
Models are Not 100% Correct
Tesla press release December 2015
"One can see this with the annual machine vision competitions, where the computer will
properly identify something as a dog more than 99% of the time, but might occasionally call it a potted plant. Making such mistakes at 70 mph
would be highly problematic."
Elon Musk
What would Dr. Malcolm think?Your scientists were so preoccupied with
whether or not they could
they didn't stop to think if they should
Humans Need Not Apply?
£0m £10m £20m £30m £40m
See ‘Humans need not Apply’ for a pessimistic assessment: https://youtu.be/7Pq-S557XQU See Jerry Kaplan for a more optimistic view: https://youtu.be/JiiP5ROnzw8
Adactus Housing Group Operating Costs 2014/15
Employees Other things
Marvin & MeMACHINE LEARNING
adactushousing.co.uk
IN HOUSING
http://linkd.in/1IxgqDbBrian Moran
Music: Emily Howell (a computer programme), ‘From Darkness, Light’: II. Fugue