Browse Source

adding an intermediate example

pull/2528/head
Eric Schles 7 years ago
parent
commit
35d9a23118
  1. 19
      examples/intermediate_example/app/__init__.py
  2. 9
      examples/intermediate_example/app/commands.py
  3. 164
      examples/intermediate_example/app/models.py
  4. 48
      examples/intermediate_example/app/static/js/main-2.js
  5. 46
      examples/intermediate_example/app/static/js/main.js
  6. 15
      examples/intermediate_example/app/templates/index.html
  7. 70
      examples/intermediate_example/app/views.py
  8. 3
      examples/intermediate_example/manage.py
  9. 30
      examples/intermediate_example/requirements.txt
  10. 3
      examples/intermediate_example/run_server.py
  11. 1
      examples/intermediate_example/start_db.sh

19
examples/intermediate_example/app/__init__.py

@ -0,0 +1,19 @@
from flask import Flask
from flask_script import Manager
#from flask.ext.sqlalchemy import SQLAlchemy
#from flask.ext.migrate import Migrate, MigrateCommand
from .commands import REPL
import os
username,password = "eric_s","1234"
app = Flask(__name__)
#app.config["SQLALCHEMY_DATABASE_URI"] = os.getenv("DATABASE_URL")
#app.config["SQLALCHEMY_DATABASE_URI"] = "postgresql://"+username+":"+password+"@localhost/backpage_ads"
#db = SQLAlchemy(app)
#migrate = Migrate(app,db)
#manager = Manager(app)
#manager.add_command('db', MigrateCommand)
#manager.add_command("shell",REPL())
from app import views #,models

9
examples/intermediate_example/app/commands.py

@ -0,0 +1,9 @@
from flask_script import Command
import code
class REPL(Command):
"runs the shell"
def run(self):
code.interact(local=locals())

164
examples/intermediate_example/app/models.py

@ -0,0 +1,164 @@
"""
Here the models for our database is defined.
I am using Postgres, Flask-SQLAlchemy for this application.
For an introduction to Flask-SQLAlchemy check out: http://flask-sqlalchemy.pocoo.org/2.1/
"""
from app import db
class ImageToText(db.Model):
"""
This model stores the lookup for an image to text from Keras Models defined in image_processing.py
parameters:
@file_name - the filename being processed
@labels - the set of labels associated with the filename
@state - the state or province the ad appeared in
@city - the city or town the ad appeared in
@location - the location parsed from the ad
@url - the url of the ad
@timestamp - the timestamp of when the ad was scraped
@phone_number - the phone number associated with the ad
@latitude - latitude parsed from the ad
@longitude - longitude parsed from the ad
@image_url - image_url used for image lookup
"""
__tablename__ = 'image_to_text'
id = db.Column(db.Integer, primary_key=True)
filename = db.Column(db.String)
labels = db.Column(db.String)
state = db.Column(db.String)
city = db.Column(db.String)
location = db.Column(db.String)
url = db.Column(db.String)
timestamp = db.Column(db.DateTime)
phone_number = db.Column(db.String)
latitude = db.Column(db.String)
longitude = db.Column(db.String)
image_url = db.Column(db.String)
throw_away = db.Column(db.String)
def __init__(
self, image_url, filename, labels, state, city,
location, url, timestamp, phone_number,
latitude, longitude,throw_away
):
self.image_url = image_url
self.filename = filename
self.labels = labels
self.state = state
self.city = city
self.location = location
self.url = url
self.timestamp = timestamp
self.phone_number = phone_number
self.latitude = latitude
self.longitude = longitude
self.throw_away = throw_away
class AreaCodeLookup(db.Model):
"""
This model provides a look up for phone number area codes and aids in converting them to latitude, longitude.
Specifically this mapping provides:
Area code and it's corresponding township.
From there geopy provides the lookup to latitude, longitude
Because location may not be unique - there could be multiple towns with the same name,
there is not a 100% guarantee all lookups will be accurate.
Source: https://www.allareacodes.com/
parameters:
@area_code - the area code from a phone number
@city - a string city
@state - a string state
@latitude - latitude for the area code
@longitude - longitude for the area code
"""
__tablename__ = "areacode_lookup"
id = db.Column(db.Integer, primary_key=True)
area_code = db.Column(db.String)
city = db.Column(db.String)
state = db.Column(db.String)
latitude = db.Column(db.String)
longitude = db.Column(db.String)
def __init__(self, area_code, city, state, latitude, longitude):
self.area_code = area_code
self.city = city
self.state = state
self.latitude = latitude
self.longitude = longitude
class BackpageAdInfo(db.Model):
"""
This model gives us a set of specific information from each add scraped from backpage.
parameters:
@ad_title - used primarily to uniquely identify backpage ads - since titles are unique
@phone_number - the phone number used in the ad, can be empty. This number is stored as a string
since it should be thought of as immutable.
@city - the city the add is from
@state - the state the add is from
@location - the location mentioned in the advertisement
@latitude - latitude derived from the location mentioned in the advertisement
@longitude - longitude derived from the location mentioned in the advertisement
@ad_body - the long form text in the ad
@photos - a filepath link to the set of pictures downloaded for the ad
@post_id - an id for each backpage post from backpage
@timestamp - when the ad was scraped
@url - the url of the scraped ad
"""
__tablename__ = 'ad_info'
id = db.Column(db.Integer, primary_key=True)
ad_title = db.Column(db.String)
phone_number = db.Column(db.String)
location = db.Column(db.String)
latitude = db.Column(db.String)
longitude = db.Column(db.String)
ad_body = db.Column(db.String)
photos = db.Column(db.String)
post_id = db.Column(db.String)
timestamp = db.Column(db.DateTime)
city = db.Column(db.String)
state = db.Column(db.String)
url = db.Column(db.String)
def __init__(self,url, ad_title, phone_number, ad_body, location, latitude, longitude, photos, post_id,timestamp, city, state):
self.url = url
self.ad_title = ad_title
self.phone_number = phone_number
self.location = location
self.latitude = latitude
self.longitude = longitude
self.ad_body = ad_body
self.photos = photos
self.post_id = post_id
self.timestamp = timestamp
self.city = city
self.state = state
class Backpage(db.Model):
"""
This model gives us high level information about backpage, the website.
It is used to determine some metrics found in lectures/scraping_the_web.md
parameters:
@timestamp - this is the time at which the content was scraped, it is assumed scrapers will run all the time,
therefore the scrape time should be accurate to within an hour of scraping, this is used in some of the metrics
for analysis.
@frequency - this is the number of ads scraped at @timestamp and is used in many of the metrics for the scraper.
"""
__tablename__ = 'backpage'
id = db.Column(db.Integer, primary_key=True)
timestamp = db.Column(db.DateTime)
frequency = db.Column(db.Integer)
def __init__(self,timestamp,frequency):
self.timestamp = timestamp
self.frequency = frequency

48
examples/intermediate_example/app/static/js/main-2.js

@ -0,0 +1,48 @@
(() => {
'use strict';
function buildGraph(response) {
const cop = response.cop;
const cops = response.cops;
const timestamp = response.timestamp;
var chart = c3.generate({
data: {
x: 'x',
columns: [
timestamp,
cop,
cops
]
},
axis: {
x: {
type: 'timeseries',
tick: {
format: '%Y-%m-%d'
}
}
}
});
}
function getArticles() {
//This returns JSON
const resp = axios.get('http://localhost:5000/api')
.then(function (response) {
console.log(response);
})
.catch(function (error) {
console.log(error);
});
// parse
// return parsed response
}
buildGraph(getArticles());
})();

46
examples/intermediate_example/app/static/js/main.js

@ -0,0 +1,46 @@
(() => {
function buildGraph() {
'use strict';
var chart = c3.generate({
data: {
x: 'x',
// xFormat: '%Y%m%d', // 'xFormat' can be used as custom format of 'x'
columns: [
['x', '2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04', '2013-01-05', '2013-01-06'],
// ['x', '20130101', '20130102', '20130103', '20130104', '20130105', '20130106'],
['data1', 30, 200, 100, 400, 150, 250],
['data2', 130, 340, 200, 500, 250, 350]
]
},
axis: {
x: {
type: 'timeseries',
tick: {
format: '%Y-%m-%d'
}
}
}
});
}
function getArticles() {
const resp = axios.get('')
.then(function (response) {
console.log(response);
})
.catch(function (error) {
console.log(error);
});
// parse
// return parsed response
}
buildGraph(getArticles());
})();

15
examples/intermediate_example/app/templates/index.html

@ -0,0 +1,15 @@
<!DOCTYPE html>
<html>
<head>
<link href="https://cdnjs.cloudflare.com/ajax/libs/c3/0.4.11/c3.min.css" rel="stylesheet" type="text/css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.17/d3.min.js" charset="utf-8"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/c3/0.4.11/c3.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/axios/0.17.1/axios.min.js"></script>
</head>
<body>
<div id="chart"></div>
<!--<script src="./main.js"></script>-->
<script src="{{url_for('static',filename='js/main.js') }}"></script>
</body>
</html>

70
examples/intermediate_example/app/views.py

@ -0,0 +1,70 @@
from app import app
#from app import db
from flask import render_template, request, jsonify
import json
import requests
import pandas as pd
from datetime import datetime
def xor(v1, v2):
if v1 and v2:
return False
if v1 and not(v2):
return True
if not(v1) and v2:
return True
if not(v1) and not(v2):
return False
def found(val):
if val == -1:
return False
else:
return True
def fetch_news():
API_KEY = 'e750e0189ede4b6b8b1a766b8523b29a'
resp = requests.get('https://newsapi.org/v1/articles?source=techcrunch&apiKey=' + API_KEY)
resp2 = requests.get('https://newsapi.org/v1/articles?source=reuters&apiKey=' + API_KEY)
resp3 = requests.get('https://newsapi.org/v1/articles?source=newsweek&apiKey=' + API_KEY)
resp4 = requests.get('https://newsapi.org/v1/articles?source=new-york-times&apiKey=' + API_KEY)
resp5 = requests.get('https://newsapi.org/v1/articles?source=the-wall-street-journal&apiKey=' + API_KEY)
resp6 = requests.get('https://newsapi.org/v1/articles?source=the-washington-post&apiKey=' + API_KEY)
list_of_words =['cop', 'cops', 'crime', 'law enforcement', 'homocide', 'crime rate', 'white collar crime', 'blue collar crime']
empty_set = set()
mention_count = {}.fromkeys(list_of_words, 0)
link_mentions = {}.fromkeys(list_of_words, empty_set)
entries = resp.json()
entries.update(resp2.json())
entries.update(resp3.json())
entries.update(resp4.json())
entries.update(resp5.json())
entries.update(resp6.json())
for article in entries['articles']:
for word in list_of_words:
title_found = found(article['title'].find(word))
description_found = found(article['description'].find(word))
if xor(title_found, description_found):
mention_count[word] += 1
link_mentions[word].add(article['url'])
link_mentions = {key:list(link_mentions[key]) for key in link_mentions}
mention_count["timestamp"] = str(datetime.now())
link_mentions["timestamp"] = str(datetime.now())
return mention_count, link_mentions
@app.route("/api", methods=["GET", "POST"])
def api():
mention_count, link_mentions = fetch_news()
return jsonify(mention_count)
@app.route("/", methods=["GET", "POST"])
def index():
return render_template("index.html")

3
examples/intermediate_example/manage.py

@ -0,0 +1,3 @@
from app import manager
manager.run()

30
examples/intermediate_example/requirements.txt

@ -0,0 +1,30 @@
boto
keras
elasticsearch
zipcode
gunicorn
geopy
usaddress
statsmodels
scipy
Flask
Flask-Cors
Flask-Migrate
Flask-Script
Flask-SQLAlchemy
Jinja2
lxml
matplotlib
nose
num2words
pandas
patsy
pbr
plotly
psycopg2
requests
SQLAlchemy
Werkzeug
twilio
geopandas
shapely

3
examples/intermediate_example/run_server.py

@ -0,0 +1,3 @@
from app import app
app.run(debug=True)

1
examples/intermediate_example/start_db.sh

@ -0,0 +1 @@
pg_ctl -D /usr/local/var/postgres -l /usr/local/var/postgres/server.log start
Loading…
Cancel
Save