mirror of https://github.com/mitsuhiko/flask.git
Eric Schles
7 years ago
11 changed files with 408 additions and 0 deletions
@ -0,0 +1,19 @@ |
|||||||
|
from flask import Flask |
||||||
|
from flask_script import Manager |
||||||
|
#from flask.ext.sqlalchemy import SQLAlchemy |
||||||
|
#from flask.ext.migrate import Migrate, MigrateCommand |
||||||
|
from .commands import REPL |
||||||
|
import os |
||||||
|
|
||||||
|
username,password = "eric_s","1234" |
||||||
|
app = Flask(__name__) |
||||||
|
#app.config["SQLALCHEMY_DATABASE_URI"] = os.getenv("DATABASE_URL") |
||||||
|
#app.config["SQLALCHEMY_DATABASE_URI"] = "postgresql://"+username+":"+password+"@localhost/backpage_ads" |
||||||
|
#db = SQLAlchemy(app) |
||||||
|
#migrate = Migrate(app,db) |
||||||
|
|
||||||
|
#manager = Manager(app) |
||||||
|
#manager.add_command('db', MigrateCommand) |
||||||
|
#manager.add_command("shell",REPL()) |
||||||
|
|
||||||
|
from app import views #,models |
@ -0,0 +1,9 @@ |
|||||||
|
from flask_script import Command |
||||||
|
import code |
||||||
|
|
||||||
|
class REPL(Command): |
||||||
|
"runs the shell" |
||||||
|
|
||||||
|
def run(self): |
||||||
|
code.interact(local=locals()) |
||||||
|
|
@ -0,0 +1,164 @@ |
|||||||
|
""" |
||||||
|
|
||||||
|
Here the models for our database is defined. |
||||||
|
|
||||||
|
I am using Postgres, Flask-SQLAlchemy for this application. |
||||||
|
|
||||||
|
For an introduction to Flask-SQLAlchemy check out: http://flask-sqlalchemy.pocoo.org/2.1/ |
||||||
|
""" |
||||||
|
from app import db |
||||||
|
|
||||||
|
class ImageToText(db.Model): |
||||||
|
""" |
||||||
|
This model stores the lookup for an image to text from Keras Models defined in image_processing.py |
||||||
|
parameters: |
||||||
|
@file_name - the filename being processed |
||||||
|
@labels - the set of labels associated with the filename |
||||||
|
@state - the state or province the ad appeared in |
||||||
|
@city - the city or town the ad appeared in |
||||||
|
@location - the location parsed from the ad |
||||||
|
@url - the url of the ad |
||||||
|
@timestamp - the timestamp of when the ad was scraped |
||||||
|
@phone_number - the phone number associated with the ad |
||||||
|
@latitude - latitude parsed from the ad |
||||||
|
@longitude - longitude parsed from the ad |
||||||
|
@image_url - image_url used for image lookup |
||||||
|
""" |
||||||
|
|
||||||
|
__tablename__ = 'image_to_text' |
||||||
|
id = db.Column(db.Integer, primary_key=True) |
||||||
|
filename = db.Column(db.String) |
||||||
|
labels = db.Column(db.String) |
||||||
|
state = db.Column(db.String) |
||||||
|
city = db.Column(db.String) |
||||||
|
location = db.Column(db.String) |
||||||
|
url = db.Column(db.String) |
||||||
|
timestamp = db.Column(db.DateTime) |
||||||
|
phone_number = db.Column(db.String) |
||||||
|
latitude = db.Column(db.String) |
||||||
|
longitude = db.Column(db.String) |
||||||
|
image_url = db.Column(db.String) |
||||||
|
throw_away = db.Column(db.String) |
||||||
|
|
||||||
|
def __init__( |
||||||
|
self, image_url, filename, labels, state, city, |
||||||
|
location, url, timestamp, phone_number, |
||||||
|
latitude, longitude,throw_away |
||||||
|
): |
||||||
|
self.image_url = image_url |
||||||
|
self.filename = filename |
||||||
|
self.labels = labels |
||||||
|
self.state = state |
||||||
|
self.city = city |
||||||
|
self.location = location |
||||||
|
self.url = url |
||||||
|
self.timestamp = timestamp |
||||||
|
self.phone_number = phone_number |
||||||
|
self.latitude = latitude |
||||||
|
self.longitude = longitude |
||||||
|
self.throw_away = throw_away |
||||||
|
|
||||||
|
|
||||||
|
class AreaCodeLookup(db.Model): |
||||||
|
""" |
||||||
|
This model provides a look up for phone number area codes and aids in converting them to latitude, longitude. |
||||||
|
Specifically this mapping provides: |
||||||
|
Area code and it's corresponding township. |
||||||
|
From there geopy provides the lookup to latitude, longitude |
||||||
|
|
||||||
|
Because location may not be unique - there could be multiple towns with the same name, |
||||||
|
there is not a 100% guarantee all lookups will be accurate. |
||||||
|
|
||||||
|
Source: https://www.allareacodes.com/ |
||||||
|
parameters: |
||||||
|
@area_code - the area code from a phone number |
||||||
|
@city - a string city |
||||||
|
@state - a string state |
||||||
|
@latitude - latitude for the area code |
||||||
|
@longitude - longitude for the area code |
||||||
|
""" |
||||||
|
__tablename__ = "areacode_lookup" |
||||||
|
id = db.Column(db.Integer, primary_key=True) |
||||||
|
area_code = db.Column(db.String) |
||||||
|
city = db.Column(db.String) |
||||||
|
state = db.Column(db.String) |
||||||
|
latitude = db.Column(db.String) |
||||||
|
longitude = db.Column(db.String) |
||||||
|
|
||||||
|
def __init__(self, area_code, city, state, latitude, longitude): |
||||||
|
self.area_code = area_code |
||||||
|
self.city = city |
||||||
|
self.state = state |
||||||
|
self.latitude = latitude |
||||||
|
self.longitude = longitude |
||||||
|
|
||||||
|
|
||||||
|
class BackpageAdInfo(db.Model): |
||||||
|
""" |
||||||
|
This model gives us a set of specific information from each add scraped from backpage. |
||||||
|
|
||||||
|
parameters: |
||||||
|
@ad_title - used primarily to uniquely identify backpage ads - since titles are unique |
||||||
|
@phone_number - the phone number used in the ad, can be empty. This number is stored as a string |
||||||
|
since it should be thought of as immutable. |
||||||
|
@city - the city the add is from |
||||||
|
@state - the state the add is from |
||||||
|
@location - the location mentioned in the advertisement |
||||||
|
@latitude - latitude derived from the location mentioned in the advertisement |
||||||
|
@longitude - longitude derived from the location mentioned in the advertisement |
||||||
|
@ad_body - the long form text in the ad |
||||||
|
@photos - a filepath link to the set of pictures downloaded for the ad |
||||||
|
@post_id - an id for each backpage post from backpage |
||||||
|
@timestamp - when the ad was scraped |
||||||
|
@url - the url of the scraped ad |
||||||
|
""" |
||||||
|
__tablename__ = 'ad_info' |
||||||
|
id = db.Column(db.Integer, primary_key=True) |
||||||
|
ad_title = db.Column(db.String) |
||||||
|
phone_number = db.Column(db.String) |
||||||
|
location = db.Column(db.String) |
||||||
|
latitude = db.Column(db.String) |
||||||
|
longitude = db.Column(db.String) |
||||||
|
ad_body = db.Column(db.String) |
||||||
|
photos = db.Column(db.String) |
||||||
|
post_id = db.Column(db.String) |
||||||
|
timestamp = db.Column(db.DateTime) |
||||||
|
city = db.Column(db.String) |
||||||
|
state = db.Column(db.String) |
||||||
|
url = db.Column(db.String) |
||||||
|
|
||||||
|
def __init__(self,url, ad_title, phone_number, ad_body, location, latitude, longitude, photos, post_id,timestamp, city, state): |
||||||
|
self.url = url |
||||||
|
self.ad_title = ad_title |
||||||
|
self.phone_number = phone_number |
||||||
|
self.location = location |
||||||
|
self.latitude = latitude |
||||||
|
self.longitude = longitude |
||||||
|
self.ad_body = ad_body |
||||||
|
self.photos = photos |
||||||
|
self.post_id = post_id |
||||||
|
self.timestamp = timestamp |
||||||
|
self.city = city |
||||||
|
self.state = state |
||||||
|
|
||||||
|
|
||||||
|
class Backpage(db.Model): |
||||||
|
""" |
||||||
|
This model gives us high level information about backpage, the website. |
||||||
|
It is used to determine some metrics found in lectures/scraping_the_web.md |
||||||
|
|
||||||
|
parameters: |
||||||
|
@timestamp - this is the time at which the content was scraped, it is assumed scrapers will run all the time, |
||||||
|
therefore the scrape time should be accurate to within an hour of scraping, this is used in some of the metrics |
||||||
|
for analysis. |
||||||
|
@frequency - this is the number of ads scraped at @timestamp and is used in many of the metrics for the scraper. |
||||||
|
""" |
||||||
|
__tablename__ = 'backpage' |
||||||
|
id = db.Column(db.Integer, primary_key=True) |
||||||
|
timestamp = db.Column(db.DateTime) |
||||||
|
frequency = db.Column(db.Integer) |
||||||
|
|
||||||
|
def __init__(self,timestamp,frequency): |
||||||
|
self.timestamp = timestamp |
||||||
|
self.frequency = frequency |
||||||
|
|
@ -0,0 +1,48 @@ |
|||||||
|
(() => { |
||||||
|
'use strict'; |
||||||
|
|
||||||
|
function buildGraph(response) { |
||||||
|
const cop = response.cop; |
||||||
|
const cops = response.cops; |
||||||
|
const timestamp = response.timestamp; |
||||||
|
|
||||||
|
|
||||||
|
var chart = c3.generate({ |
||||||
|
data: { |
||||||
|
x: 'x', |
||||||
|
columns: [ |
||||||
|
timestamp, |
||||||
|
cop, |
||||||
|
cops |
||||||
|
] |
||||||
|
}, |
||||||
|
axis: { |
||||||
|
x: { |
||||||
|
type: 'timeseries', |
||||||
|
tick: { |
||||||
|
format: '%Y-%m-%d' |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
function getArticles() { |
||||||
|
//This returns JSON
|
||||||
|
const resp = axios.get('http://localhost:5000/api') |
||||||
|
.then(function (response) { |
||||||
|
console.log(response); |
||||||
|
}) |
||||||
|
.catch(function (error) { |
||||||
|
console.log(error); |
||||||
|
}); |
||||||
|
|
||||||
|
// parse
|
||||||
|
// return parsed response
|
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
buildGraph(getArticles()); |
||||||
|
|
||||||
|
})(); |
||||||
|
|
@ -0,0 +1,46 @@ |
|||||||
|
|
||||||
|
(() => { |
||||||
|
|
||||||
|
function buildGraph() { |
||||||
|
'use strict'; |
||||||
|
|
||||||
|
var chart = c3.generate({ |
||||||
|
data: { |
||||||
|
x: 'x', |
||||||
|
// xFormat: '%Y%m%d', // 'xFormat' can be used as custom format of 'x'
|
||||||
|
columns: [ |
||||||
|
['x', '2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04', '2013-01-05', '2013-01-06'], |
||||||
|
// ['x', '20130101', '20130102', '20130103', '20130104', '20130105', '20130106'],
|
||||||
|
['data1', 30, 200, 100, 400, 150, 250], |
||||||
|
['data2', 130, 340, 200, 500, 250, 350] |
||||||
|
] |
||||||
|
}, |
||||||
|
axis: { |
||||||
|
x: { |
||||||
|
type: 'timeseries', |
||||||
|
tick: { |
||||||
|
format: '%Y-%m-%d' |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
function getArticles() { |
||||||
|
const resp = axios.get('') |
||||||
|
.then(function (response) { |
||||||
|
console.log(response); |
||||||
|
}) |
||||||
|
.catch(function (error) { |
||||||
|
console.log(error); |
||||||
|
}); |
||||||
|
|
||||||
|
// parse
|
||||||
|
// return parsed response
|
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
buildGraph(getArticles()); |
||||||
|
|
||||||
|
})(); |
||||||
|
|
@ -0,0 +1,15 @@ |
|||||||
|
<!DOCTYPE html> |
||||||
|
<html> |
||||||
|
<head> |
||||||
|
<link href="https://cdnjs.cloudflare.com/ajax/libs/c3/0.4.11/c3.min.css" rel="stylesheet" type="text/css"> |
||||||
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.17/d3.min.js" charset="utf-8"></script> |
||||||
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/c3/0.4.11/c3.min.js"></script> |
||||||
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/axios/0.17.1/axios.min.js"></script> |
||||||
|
</head> |
||||||
|
<body> |
||||||
|
<div id="chart"></div> |
||||||
|
|
||||||
|
<!--<script src="./main.js"></script>--> |
||||||
|
<script src="{{url_for('static',filename='js/main.js') }}"></script> |
||||||
|
</body> |
||||||
|
</html> |
@ -0,0 +1,70 @@ |
|||||||
|
from app import app |
||||||
|
#from app import db |
||||||
|
from flask import render_template, request, jsonify |
||||||
|
import json |
||||||
|
import requests |
||||||
|
import pandas as pd |
||||||
|
from datetime import datetime |
||||||
|
|
||||||
|
def xor(v1, v2): |
||||||
|
if v1 and v2: |
||||||
|
return False |
||||||
|
if v1 and not(v2): |
||||||
|
return True |
||||||
|
if not(v1) and v2: |
||||||
|
return True |
||||||
|
if not(v1) and not(v2): |
||||||
|
return False |
||||||
|
|
||||||
|
def found(val): |
||||||
|
if val == -1: |
||||||
|
return False |
||||||
|
else: |
||||||
|
return True |
||||||
|
|
||||||
|
def fetch_news(): |
||||||
|
API_KEY = 'e750e0189ede4b6b8b1a766b8523b29a' |
||||||
|
|
||||||
|
resp = requests.get('https://newsapi.org/v1/articles?source=techcrunch&apiKey=' + API_KEY) |
||||||
|
resp2 = requests.get('https://newsapi.org/v1/articles?source=reuters&apiKey=' + API_KEY) |
||||||
|
resp3 = requests.get('https://newsapi.org/v1/articles?source=newsweek&apiKey=' + API_KEY) |
||||||
|
resp4 = requests.get('https://newsapi.org/v1/articles?source=new-york-times&apiKey=' + API_KEY) |
||||||
|
resp5 = requests.get('https://newsapi.org/v1/articles?source=the-wall-street-journal&apiKey=' + API_KEY) |
||||||
|
resp6 = requests.get('https://newsapi.org/v1/articles?source=the-washington-post&apiKey=' + API_KEY) |
||||||
|
|
||||||
|
list_of_words =['cop', 'cops', 'crime', 'law enforcement', 'homocide', 'crime rate', 'white collar crime', 'blue collar crime'] |
||||||
|
|
||||||
|
empty_set = set() |
||||||
|
mention_count = {}.fromkeys(list_of_words, 0) |
||||||
|
link_mentions = {}.fromkeys(list_of_words, empty_set) |
||||||
|
|
||||||
|
entries = resp.json() |
||||||
|
entries.update(resp2.json()) |
||||||
|
entries.update(resp3.json()) |
||||||
|
entries.update(resp4.json()) |
||||||
|
entries.update(resp5.json()) |
||||||
|
entries.update(resp6.json()) |
||||||
|
|
||||||
|
for article in entries['articles']: |
||||||
|
for word in list_of_words: |
||||||
|
title_found = found(article['title'].find(word)) |
||||||
|
description_found = found(article['description'].find(word)) |
||||||
|
if xor(title_found, description_found): |
||||||
|
mention_count[word] += 1 |
||||||
|
link_mentions[word].add(article['url']) |
||||||
|
|
||||||
|
link_mentions = {key:list(link_mentions[key]) for key in link_mentions} |
||||||
|
mention_count["timestamp"] = str(datetime.now()) |
||||||
|
link_mentions["timestamp"] = str(datetime.now()) |
||||||
|
return mention_count, link_mentions |
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api", methods=["GET", "POST"]) |
||||||
|
def api(): |
||||||
|
mention_count, link_mentions = fetch_news() |
||||||
|
return jsonify(mention_count) |
||||||
|
|
||||||
|
|
||||||
|
@app.route("/", methods=["GET", "POST"]) |
||||||
|
def index(): |
||||||
|
return render_template("index.html") |
@ -0,0 +1,3 @@ |
|||||||
|
from app import manager |
||||||
|
|
||||||
|
manager.run() |
@ -0,0 +1,30 @@ |
|||||||
|
boto |
||||||
|
keras |
||||||
|
elasticsearch |
||||||
|
zipcode |
||||||
|
gunicorn |
||||||
|
geopy |
||||||
|
usaddress |
||||||
|
statsmodels |
||||||
|
scipy |
||||||
|
Flask |
||||||
|
Flask-Cors |
||||||
|
Flask-Migrate |
||||||
|
Flask-Script |
||||||
|
Flask-SQLAlchemy |
||||||
|
Jinja2 |
||||||
|
lxml |
||||||
|
matplotlib |
||||||
|
nose |
||||||
|
num2words |
||||||
|
pandas |
||||||
|
patsy |
||||||
|
pbr |
||||||
|
plotly |
||||||
|
psycopg2 |
||||||
|
requests |
||||||
|
SQLAlchemy |
||||||
|
Werkzeug |
||||||
|
twilio |
||||||
|
geopandas |
||||||
|
shapely |
@ -0,0 +1,3 @@ |
|||||||
|
from app import app |
||||||
|
|
||||||
|
app.run(debug=True) |
Loading…
Reference in new issue