Source code for webcrawler.analyze

#!/usr/bin/python
"""Analyze Module, was designed to analyze and plot csv data into a scatter and box plot graph.
"""
import os
import warnings
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

plt.switch_backend('agg')
warnings.filterwarnings("ignore", category=RuntimeWarning)

[docs]def graph_scatter(frame): """CSV data scatter plot. Plots are made from the house price and size. """ print frame _, graph = plt.subplots() graph.set_xlabel('Size: sqft/acres', size=12) graph.set_ylabel('Prices', size=12) graph.set_title("House's Size and Price") color = np.random.random(len(frame)) graph.scatter(x=frame["Size"], y=frame["Price"], c=color) plt.savefig("../other/size_price.png")
#img = Image.open("size_price.png") #img.show()
[docs]def graph_boxplot(frame): """CSV data box plot. Plots are made from the house ratios (price/size) and grouped by year. """ frame["Ratio"] = frame["Price"] / frame["Size"] print frame try: graph = frame.boxplot(column="Ratio", by="Year") graph.set_xticklabels(frame['Year'], rotation=90) graph.set_ylim(-10, 175) except RuntimeError: pass plt.savefig("../other/ratio.png")
#img = Image.open("../ratio.png") #img.show()
[docs]def main(): """Analyze and create a scatter and box plot. """ rec_columns = ['Address', 'Year', 'Size', 'Price'] # Records Columns. files_name = os.listdir("./csv") frame = pd.DataFrame() # Create dataframe. records = [] # Empty list for dataset records. try: os.chdir("./csv") except OSError: pass for files in files_name: if ".csv" in files: records.append(pd.read_csv(files, header=None, names=rec_columns)) frame = pd.concat(records) graph_scatter(frame) plt.close() graph_boxplot(frame)
if __name__ == "__main__": main()