mirror of https://github.com/mitsuhiko/flask.git
Eric Schles
7 years ago
11 changed files with 408 additions and 0 deletions
@ -0,0 +1,19 @@
|
||||
from flask import Flask |
||||
from flask_script import Manager |
||||
#from flask.ext.sqlalchemy import SQLAlchemy |
||||
#from flask.ext.migrate import Migrate, MigrateCommand |
||||
from .commands import REPL |
||||
import os |
||||
|
||||
username,password = "eric_s","1234" |
||||
app = Flask(__name__) |
||||
#app.config["SQLALCHEMY_DATABASE_URI"] = os.getenv("DATABASE_URL") |
||||
#app.config["SQLALCHEMY_DATABASE_URI"] = "postgresql://"+username+":"+password+"@localhost/backpage_ads" |
||||
#db = SQLAlchemy(app) |
||||
#migrate = Migrate(app,db) |
||||
|
||||
#manager = Manager(app) |
||||
#manager.add_command('db', MigrateCommand) |
||||
#manager.add_command("shell",REPL()) |
||||
|
||||
from app import views #,models |
@ -0,0 +1,9 @@
|
||||
from flask_script import Command |
||||
import code |
||||
|
||||
class REPL(Command): |
||||
"runs the shell" |
||||
|
||||
def run(self): |
||||
code.interact(local=locals()) |
||||
|
@ -0,0 +1,164 @@
|
||||
""" |
||||
|
||||
Here the models for our database is defined. |
||||
|
||||
I am using Postgres, Flask-SQLAlchemy for this application. |
||||
|
||||
For an introduction to Flask-SQLAlchemy check out: http://flask-sqlalchemy.pocoo.org/2.1/ |
||||
""" |
||||
from app import db |
||||
|
||||
class ImageToText(db.Model): |
||||
""" |
||||
This model stores the lookup for an image to text from Keras Models defined in image_processing.py |
||||
parameters: |
||||
@file_name - the filename being processed |
||||
@labels - the set of labels associated with the filename |
||||
@state - the state or province the ad appeared in |
||||
@city - the city or town the ad appeared in |
||||
@location - the location parsed from the ad |
||||
@url - the url of the ad |
||||
@timestamp - the timestamp of when the ad was scraped |
||||
@phone_number - the phone number associated with the ad |
||||
@latitude - latitude parsed from the ad |
||||
@longitude - longitude parsed from the ad |
||||
@image_url - image_url used for image lookup |
||||
""" |
||||
|
||||
__tablename__ = 'image_to_text' |
||||
id = db.Column(db.Integer, primary_key=True) |
||||
filename = db.Column(db.String) |
||||
labels = db.Column(db.String) |
||||
state = db.Column(db.String) |
||||
city = db.Column(db.String) |
||||
location = db.Column(db.String) |
||||
url = db.Column(db.String) |
||||
timestamp = db.Column(db.DateTime) |
||||
phone_number = db.Column(db.String) |
||||
latitude = db.Column(db.String) |
||||
longitude = db.Column(db.String) |
||||
image_url = db.Column(db.String) |
||||
throw_away = db.Column(db.String) |
||||
|
||||
def __init__( |
||||
self, image_url, filename, labels, state, city, |
||||
location, url, timestamp, phone_number, |
||||
latitude, longitude,throw_away |
||||
): |
||||
self.image_url = image_url |
||||
self.filename = filename |
||||
self.labels = labels |
||||
self.state = state |
||||
self.city = city |
||||
self.location = location |
||||
self.url = url |
||||
self.timestamp = timestamp |
||||
self.phone_number = phone_number |
||||
self.latitude = latitude |
||||
self.longitude = longitude |
||||
self.throw_away = throw_away |
||||
|
||||
|
||||
class AreaCodeLookup(db.Model): |
||||
""" |
||||
This model provides a look up for phone number area codes and aids in converting them to latitude, longitude. |
||||
Specifically this mapping provides: |
||||
Area code and it's corresponding township. |
||||
From there geopy provides the lookup to latitude, longitude |
||||
|
||||
Because location may not be unique - there could be multiple towns with the same name, |
||||
there is not a 100% guarantee all lookups will be accurate. |
||||
|
||||
Source: https://www.allareacodes.com/ |
||||
parameters: |
||||
@area_code - the area code from a phone number |
||||
@city - a string city |
||||
@state - a string state |
||||
@latitude - latitude for the area code |
||||
@longitude - longitude for the area code |
||||
""" |
||||
__tablename__ = "areacode_lookup" |
||||
id = db.Column(db.Integer, primary_key=True) |
||||
area_code = db.Column(db.String) |
||||
city = db.Column(db.String) |
||||
state = db.Column(db.String) |
||||
latitude = db.Column(db.String) |
||||
longitude = db.Column(db.String) |
||||
|
||||
def __init__(self, area_code, city, state, latitude, longitude): |
||||
self.area_code = area_code |
||||
self.city = city |
||||
self.state = state |
||||
self.latitude = latitude |
||||
self.longitude = longitude |
||||
|
||||
|
||||
class BackpageAdInfo(db.Model): |
||||
""" |
||||
This model gives us a set of specific information from each add scraped from backpage. |
||||
|
||||
parameters: |
||||
@ad_title - used primarily to uniquely identify backpage ads - since titles are unique |
||||
@phone_number - the phone number used in the ad, can be empty. This number is stored as a string |
||||
since it should be thought of as immutable. |
||||
@city - the city the add is from |
||||
@state - the state the add is from |
||||
@location - the location mentioned in the advertisement |
||||
@latitude - latitude derived from the location mentioned in the advertisement |
||||
@longitude - longitude derived from the location mentioned in the advertisement |
||||
@ad_body - the long form text in the ad |
||||
@photos - a filepath link to the set of pictures downloaded for the ad |
||||
@post_id - an id for each backpage post from backpage |
||||
@timestamp - when the ad was scraped |
||||
@url - the url of the scraped ad |
||||
""" |
||||
__tablename__ = 'ad_info' |
||||
id = db.Column(db.Integer, primary_key=True) |
||||
ad_title = db.Column(db.String) |
||||
phone_number = db.Column(db.String) |
||||
location = db.Column(db.String) |
||||
latitude = db.Column(db.String) |
||||
longitude = db.Column(db.String) |
||||
ad_body = db.Column(db.String) |
||||
photos = db.Column(db.String) |
||||
post_id = db.Column(db.String) |
||||
timestamp = db.Column(db.DateTime) |
||||
city = db.Column(db.String) |
||||
state = db.Column(db.String) |
||||
url = db.Column(db.String) |
||||
|
||||
def __init__(self,url, ad_title, phone_number, ad_body, location, latitude, longitude, photos, post_id,timestamp, city, state): |
||||
self.url = url |
||||
self.ad_title = ad_title |
||||
self.phone_number = phone_number |
||||
self.location = location |
||||
self.latitude = latitude |
||||
self.longitude = longitude |
||||
self.ad_body = ad_body |
||||
self.photos = photos |
||||
self.post_id = post_id |
||||
self.timestamp = timestamp |
||||
self.city = city |
||||
self.state = state |
||||
|
||||
|
||||
class Backpage(db.Model): |
||||
""" |
||||
This model gives us high level information about backpage, the website. |
||||
It is used to determine some metrics found in lectures/scraping_the_web.md |
||||
|
||||
parameters: |
||||
@timestamp - this is the time at which the content was scraped, it is assumed scrapers will run all the time, |
||||
therefore the scrape time should be accurate to within an hour of scraping, this is used in some of the metrics |
||||
for analysis. |
||||
@frequency - this is the number of ads scraped at @timestamp and is used in many of the metrics for the scraper. |
||||
""" |
||||
__tablename__ = 'backpage' |
||||
id = db.Column(db.Integer, primary_key=True) |
||||
timestamp = db.Column(db.DateTime) |
||||
frequency = db.Column(db.Integer) |
||||
|
||||
def __init__(self,timestamp,frequency): |
||||
self.timestamp = timestamp |
||||
self.frequency = frequency |
||||
|
@ -0,0 +1,48 @@
|
||||
(() => { |
||||
'use strict'; |
||||
|
||||
function buildGraph(response) { |
||||
const cop = response.cop; |
||||
const cops = response.cops; |
||||
const timestamp = response.timestamp; |
||||
|
||||
|
||||
var chart = c3.generate({ |
||||
data: { |
||||
x: 'x', |
||||
columns: [ |
||||
timestamp, |
||||
cop, |
||||
cops |
||||
] |
||||
}, |
||||
axis: { |
||||
x: { |
||||
type: 'timeseries', |
||||
tick: { |
||||
format: '%Y-%m-%d' |
||||
} |
||||
} |
||||
} |
||||
}); |
||||
} |
||||
|
||||
function getArticles() { |
||||
//This returns JSON
|
||||
const resp = axios.get('http://localhost:5000/api') |
||||
.then(function (response) { |
||||
console.log(response); |
||||
}) |
||||
.catch(function (error) { |
||||
console.log(error); |
||||
}); |
||||
|
||||
// parse
|
||||
// return parsed response
|
||||
} |
||||
|
||||
|
||||
buildGraph(getArticles()); |
||||
|
||||
})(); |
||||
|
@ -0,0 +1,46 @@
|
||||
|
||||
(() => { |
||||
|
||||
function buildGraph() { |
||||
'use strict'; |
||||
|
||||
var chart = c3.generate({ |
||||
data: { |
||||
x: 'x', |
||||
// xFormat: '%Y%m%d', // 'xFormat' can be used as custom format of 'x'
|
||||
columns: [ |
||||
['x', '2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04', '2013-01-05', '2013-01-06'], |
||||
// ['x', '20130101', '20130102', '20130103', '20130104', '20130105', '20130106'],
|
||||
['data1', 30, 200, 100, 400, 150, 250], |
||||
['data2', 130, 340, 200, 500, 250, 350] |
||||
] |
||||
}, |
||||
axis: { |
||||
x: { |
||||
type: 'timeseries', |
||||
tick: { |
||||
format: '%Y-%m-%d' |
||||
} |
||||
} |
||||
} |
||||
}); |
||||
} |
||||
|
||||
function getArticles() { |
||||
const resp = axios.get('') |
||||
.then(function (response) { |
||||
console.log(response); |
||||
}) |
||||
.catch(function (error) { |
||||
console.log(error); |
||||
}); |
||||
|
||||
// parse
|
||||
// return parsed response
|
||||
} |
||||
|
||||
|
||||
buildGraph(getArticles()); |
||||
|
||||
})(); |
||||
|
@ -0,0 +1,15 @@
|
||||
<!DOCTYPE html> |
||||
<html> |
||||
<head> |
||||
<link href="https://cdnjs.cloudflare.com/ajax/libs/c3/0.4.11/c3.min.css" rel="stylesheet" type="text/css"> |
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.17/d3.min.js" charset="utf-8"></script> |
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/c3/0.4.11/c3.min.js"></script> |
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/axios/0.17.1/axios.min.js"></script> |
||||
</head> |
||||
<body> |
||||
<div id="chart"></div> |
||||
|
||||
<!--<script src="./main.js"></script>--> |
||||
<script src="{{url_for('static',filename='js/main.js') }}"></script> |
||||
</body> |
||||
</html> |
@ -0,0 +1,70 @@
|
||||
from app import app |
||||
#from app import db |
||||
from flask import render_template, request, jsonify |
||||
import json |
||||
import requests |
||||
import pandas as pd |
||||
from datetime import datetime |
||||
|
||||
def xor(v1, v2): |
||||
if v1 and v2: |
||||
return False |
||||
if v1 and not(v2): |
||||
return True |
||||
if not(v1) and v2: |
||||
return True |
||||
if not(v1) and not(v2): |
||||
return False |
||||
|
||||
def found(val): |
||||
if val == -1: |
||||
return False |
||||
else: |
||||
return True |
||||
|
||||
def fetch_news(): |
||||
API_KEY = 'e750e0189ede4b6b8b1a766b8523b29a' |
||||
|
||||
resp = requests.get('https://newsapi.org/v1/articles?source=techcrunch&apiKey=' + API_KEY) |
||||
resp2 = requests.get('https://newsapi.org/v1/articles?source=reuters&apiKey=' + API_KEY) |
||||
resp3 = requests.get('https://newsapi.org/v1/articles?source=newsweek&apiKey=' + API_KEY) |
||||
resp4 = requests.get('https://newsapi.org/v1/articles?source=new-york-times&apiKey=' + API_KEY) |
||||
resp5 = requests.get('https://newsapi.org/v1/articles?source=the-wall-street-journal&apiKey=' + API_KEY) |
||||
resp6 = requests.get('https://newsapi.org/v1/articles?source=the-washington-post&apiKey=' + API_KEY) |
||||
|
||||
list_of_words =['cop', 'cops', 'crime', 'law enforcement', 'homocide', 'crime rate', 'white collar crime', 'blue collar crime'] |
||||
|
||||
empty_set = set() |
||||
mention_count = {}.fromkeys(list_of_words, 0) |
||||
link_mentions = {}.fromkeys(list_of_words, empty_set) |
||||
|
||||
entries = resp.json() |
||||
entries.update(resp2.json()) |
||||
entries.update(resp3.json()) |
||||
entries.update(resp4.json()) |
||||
entries.update(resp5.json()) |
||||
entries.update(resp6.json()) |
||||
|
||||
for article in entries['articles']: |
||||
for word in list_of_words: |
||||
title_found = found(article['title'].find(word)) |
||||
description_found = found(article['description'].find(word)) |
||||
if xor(title_found, description_found): |
||||
mention_count[word] += 1 |
||||
link_mentions[word].add(article['url']) |
||||
|
||||
link_mentions = {key:list(link_mentions[key]) for key in link_mentions} |
||||
mention_count["timestamp"] = str(datetime.now()) |
||||
link_mentions["timestamp"] = str(datetime.now()) |
||||
return mention_count, link_mentions |
||||
|
||||
|
||||
@app.route("/api", methods=["GET", "POST"]) |
||||
def api(): |
||||
mention_count, link_mentions = fetch_news() |
||||
return jsonify(mention_count) |
||||
|
||||
|
||||
@app.route("/", methods=["GET", "POST"]) |
||||
def index(): |
||||
return render_template("index.html") |
@ -0,0 +1,3 @@
|
||||
from app import manager |
||||
|
||||
manager.run() |
@ -0,0 +1,30 @@
|
||||
boto |
||||
keras |
||||
elasticsearch |
||||
zipcode |
||||
gunicorn |
||||
geopy |
||||
usaddress |
||||
statsmodels |
||||
scipy |
||||
Flask |
||||
Flask-Cors |
||||
Flask-Migrate |
||||
Flask-Script |
||||
Flask-SQLAlchemy |
||||
Jinja2 |
||||
lxml |
||||
matplotlib |
||||
nose |
||||
num2words |
||||
pandas |
||||
patsy |
||||
pbr |
||||
plotly |
||||
psycopg2 |
||||
requests |
||||
SQLAlchemy |
||||
Werkzeug |
||||
twilio |
||||
geopandas |
||||
shapely |
@ -0,0 +1,3 @@
|
||||
from app import app |
||||
|
||||
app.run(debug=True) |
Loading…
Reference in new issue