INTRODUCTION
Alittleaboutme
EmbeddedtoOrchestrationRedHatemergingtechnologiesOpenStackSaharaOshinkoprojectforOpenShift
#ApacheBigDataEU2016 2
OVERVIEW
BuildingApplicationPipelines
CaseStudy:Ophicleide
Demonstration
LessonsLearned
NextSteps
#ApacheBigDataEU2016 3
INSPIRATION
Largerthemes
DeveloperempowermentImprovedcollaborationOperationalfreedom
#ApacheBigDataEU2016 4
CLOUDAPPLICATIONS
Whatarewetalkingabout?
MultipledisparatecomponentsRequiredeploymentflexibilityChallengingtodebug
#ApacheBigDataEU2016
Spark
HTTP
Node.jsMongoDB
Python
MySQL
RubyKafka
HDFS
ActiveMQ
PostgreSQL
5
PLANNING
Beforeyoubeginengineering
IdentifymovingpiecesStoryboardthedataflowVisualizesuccessandfailure
#ApacheBigDataEU2016
Spark HTTPNode.js MongoDBPython
6
PLANNING
Insightfulanalytics
Whatdataset?Howtoprocess?Wherearetheresults?
#ApacheBigDataEU2016
Spark HTTPNode.js MongoDBPython
Ingest Process Publish
7
BUILDING
Decomposeapplicationcomponents
NaturalbreakpointsBuildformodularityStatelessversusstateful
#ApacheBigDataEU2016
Spark HTTPNode.js MongoDBPython
8
BUILDING
Focusonthecommunication
CoordinateinthemiddleNetworkresiliencyKubernetesDNS
#ApacheBigDataEU2016 9
COLLABORATING
Buildingasateam
TherighttoolsModularprojectsIterativeimprovementsCoordinatingactions
#ApacheBigDataEU2016
Spark HTTPNode.js MongoDBPython
10
CASESTUDY:OPHICLEIDE
Whatdoesitdo?
Word2VecmodelsHTTPavailabledataSimilarityqueries
#ApacheBigDataEU2016
SparkBrowser Node.js
MongoDB
Python
Kubernetes
Spark
Spark
Spark
Spark
TextData
TextData
TextData
12
CASESTUDY:OPHICLEIDE
Buildingblocks
ApacheSparkWord2VecKubernetesOpenShiftNode.jsFlaskMongoDBOpenAPI
#ApacheBigDataEU2016 13
OPENAPI
#ApacheBigDataEU2016
paths:/:get:description:|-Returnsinformationabouttheserverversionresponses:"200":description:|-Validserverinforesponseschema:
app=connexion.App(__name__,specification_dir='./swagger/')app.add_api('swagger.yaml',arguments={'title':'TheRESTAPIfortheOphicleide''Word2Vecserver'})app.run(port=8080)
15
DEEPDIVE
ConfigurationData
Whatisneeded?Howtodeliver?
#ApacheBigDataEU2016
Node.js
Python
Kubernetes
REST_ADDR=127.0.0.1
REST_PORT=8080
MONGO=mongodb://admin:admin@mongodb
16
CONFIGURATIONDATA
#ApacheBigDataEU2016
spec:containers:-name:${WEBNAME}image:${WEBIMAGE}env:-name:OPH_TRAINING_ADDRvalue:${OPH_ADDR}-name:OPH_TRAINING_PORTvalue:${OPH_PORT}-name:OPH_WEB_PORTvalue:"8081"ports:-containerPort:8081protocol:TCP
17
CONFIGURATIONDATA
#ApacheBigDataEU2016
vartraining_addr=process.env.OPH_TRAINING_ADDR||'127.0.0.1';vartraining_port=process.env.OPH_TRAINING_PORT||'8080';varweb_port=process.env.OPH_WEB_PORT||8080;
app.get("/api/models",function(req,res){varurl=`http://${training_addr}:${training_port}/models`;request.get(url).pipe(res);});
app.get("/api/queries",function(req,res){varurl=`http://${training_addr}:${training_port}/queries`;request.get(url).pipe(res);});
app.listen(ophicleide_web_port,function(){console.log(`ophicleide-weblisteningon${web_port}`);});
18
SECRETS
NotusedinOphicleide,butworthmentioning
#ApacheBigDataEU2016
volumes:-name:mongo-secret-volumesecret:secretName:mongo-secretcontainers:-name:shiny-squirrelimage:elmiko/shiny_squirrelargs:["mongodb"]volumeMounts:-name:mongo-secret-volumemountPath:/etc/mongo-secretreadOnly:true
19
SECRETS
Eachsecretexposedasafileinthecontainer
#ApacheBigDataEU2016
MONGO_USER=$(cat/etc/mongo-secret/username)MONGO_PASS=$(cat/etc/mongo-secret/password)
/usr/bin/python/opt/shiny_squirrel/shiny_squirrel.py\--mongo\mongodb://${MONGO_USER}:${MONGO_PASS}@${MONGO_HOST_PORT}
20
SPARKPROCESSING
#ApacheBigDataEU2016
defworkloop(master,inq,outq,dburl):sconf=SparkConf().setAppName("ophicleide-worker").setMaster(master)sc=SparkContext(conf=sconf)
ifdburlisnotNone:db=pymongo.MongoClient(dburl).ophicleide
outq.put("ready")
whileTrue:job=inq.get()urls=job["urls"]mid=job["_id"]model=train(sc,urls)
items=model.getVectors().items()words,vecs=zip(*[(w,list(v))forw,vinitems])
22
SPARKPROCESSING
#ApacheBigDataEU2016
deftrain(sc,urls):w2v=Word2Vec()rdds=reduce(lambdaa,b:a.union(b),[url2rdd(sc,url)forurlinurls])returnw2v.fit(rdds)
defurl2rdd(sc,url):response=urlopen(url)corpus_bytes=response.read()text=str(corpus_bytes).replace("\\r","\r").replace("\\n","\n")rdd=sc.parallelize(text.split("\r\n\r\n"))rdd.map(lambdal:l.replace("\r\n","").split(""))returnrdd.map(lambdal:cleanstr(l).split(""))
23
SPARKPROCESSING
#ApacheBigDataEU2016
defcreate_query(newQuery)->str:mid=newQuery["model"]word=newQuery["word"]model=model_cache_find(mid)ifmodelisNone:msg=(("notrainedmodelwithID%ravailable;"%mid)+"check/modelstoseewhenoneisready")returnjson_error("NotFound",404,msg)else:#XXXw2v=model["w2v"]qid=uuid4()try:syns=w2v.findSynonyms(word,5)q={"_id":qid,"word":word,"results":syns,"modelName":model["name"],"model":mid}(query_collection()).insert_one(q)
24
DEMONSTRATION
seeademoat
#ApacheBigDataEU2016
https://vimeo.com/189710503
25
LESSONSLEARNED
Thingsthatwentsmoothly
OpenAPIDockerfilesKuberenetestemplates
#ApacheBigDataEU2016 26
LESSONSLEARNED
Thingsthatrequiregreatercoordination
APIcoordinationComputeresourcesPersistentstorageSparkconfigurations
#ApacheBigDataEU2016 27
LESSONSLEARNED
Computeresources
CPUandmemoryconstraintsLabelselectors
#ApacheBigDataEU2016
Kubelet
Node
Pod Pod
Pod
Pod
Pod
Kubelet
Node
Pod Pod
Pod
Kubelet
Node
Pod
28
NEXTSTEPS
Wheretotakethisproject?
MoreSpark!SeparatequeryserviceDevelopmentversusproduction
#ApacheBigDataEU2016 29
PROJECTLINKS
Ophicleide
ApacheSpark
Kubernetes
OpenShift
#ApacheBigDataEU2016
https://github.com/ophicleide
https://spark.apache.org
https://kubernetes.io
https://openshift.org
30