Fitting Raw 2D Data Points to a Linear Equation (y = mx+b)

Stephen Lukacs (2) iquanta.org/instruct/python
Designed to upload LabQuest2 text (txt) outputs...
OR, manually enter data below...
(x, y) Data Entry.. .

Enter for example "Run 1" at the top of each run or trial.
Column for X Series: Column for Y Series:
, or, just Upload to run the demonstration.
lecture by Stephen Lukacs, Ph.D., ©2011 - 2023; updated: March 7, 2023. all data confirmed via lecture_data_analysis.nb.
"""
reference: https://iquanta.org/instruct/python ::: Statistics 2: Linear Regression ::: Stephen Lukacs, Ph.D. ©2023-01-06
"""
from py4web import URL, request
from ombott.request_pkg.helpers import FileUpload
from yatl.helpers import *
from iquanta.mcp import is_str_float, is_str_int, str_to_float, str_to_int, extra_x
from copy import copy
from numpy import mean, std, polyfit
import plotly.graph_objects as go

<page_scripts>
<style>
input[type=text] { height: 30px; width: 70px; text-align: center; border-radius: 7px; }
input[type=file]::file-selector-button { width: 170px; border-radius: 7px; }
textarea { margin: 0px 2px; width: 410px; height: 230px; font-size: 12pt; border-radius: 5px; }
p { margin: 2px 0px; padding: 8px; border-radius: 10px; border: 2px solid silver; }
div.error { display: block; color: red; font-weight: bold; font-size: 16pt; background-color: white; }
</style>
</page_scripts>

BR = TAG['br/']
demo_file = "0, 1.76\n1, 1.81\n2.04, 1.87\n2.97, 1.92\n4, 1.97\n5.1, 2.04\n6.08, 2.11\n7.1, 2.16\n8.1, 2.26\n9.16, 2.35\n10.08, 2.42\n11.17, 2.52\n12.1, 2.60\n13.2, 2.80\n14.16, 2.96\n15.1, 3.39\n15.6, 3.51\n16.1, 3.99\n16.5, 5.16\n17.07, 5.53\n17.5, 5.82\n18.1, 6.01\n19.2, 6.21\n20.5, 6.42\n21.5, 6.57\n22.06, 6.66\n22.51, 6.71\n23.18, 6.78\n23.66, 6.83\n24.27, 6.89\n25.37, 7.01\n26.4, 7.12\n27.47, 7.23\n28.02, 7.29\n29.04, 7.42\n29.7, 7.51\n30.3, 7.62\n30.81, 7.72\n31.37, 7.84\n31.6, 7.89\n31.91, 7.98\n32.33, 8.14\n32.66, 8.31\n33.1, 8.54\n33.31, 8.99\n33.58, 9.35\n33.81, 9.68\n34.22, 10.01\n34.98, 10.45\n35.5, 10.68\n36.11, 10.84\n36.62, 10.96\n37.68, 11.13\n38.81, 11.28\n40.02, 11.40\n41.2, 11.49\n41.58, 11.51\n42, 11.54\n42.37, 11.56\n43, 11.60\n43.34, 11.61\n44.08, 11.65\n45, 11.70\n45.91, 11.74\n47.03, 11.79\n" #phosphoric acid
#demo_file = "Run 1\n0, -1\n1, 294\n2, 523\n3, 697\n4, 842\n5, 1033\n6, 1213\n7, 1368\n8, 1538\n9, 1707\n10, 1774\n11, 1918\n12, 2058\n13, 2230\n14, 2382\n15, 2507\n16, 2654\n17, 2810\n18, 2936\n19, 3078\n20, 3224\n21, 3358\n22, 3471\n23, 3615\n24, 3726\n25, 3827\n\nRun 2\n0, -1\n1, 144\n2, 233\n3, 381\n4, 521\n5, 648\n6, 805\n7, 893\n8, 1057\n9, 1170\n10, 1266\n11, 1417\n12, 1498\n13, 1580\n14, 1769\n15, 1888\n16, 2007\n17, 2128\n18, 2268\n19, 2417\n20, 2549\n21, 2689\n22, 2813\n23, 2934\n24, 3073\n25, 3200\n\nRun 3\n0, -1\n1, 76\n2, 160\n3, 251\n4, 338\n5, 426\n6, 549\n7, 641\n8, 731\n9, 818\n10, 892\n11, 977\n12, 1046\n13, 1114\n14, 1196\n15, 1268\n16, 1353\n17, 1447\n18, 1530\n19, 1605\n20, 1677\n21, 1759\n22, 1834\n23, 1912\n24, 2006\n25, 2085\n"

vernier_tag_line, vernier_file_type = 'Vernier Format 2', '(LabQuest2 batch datafile)'
rtn, txttype, txt, tmp = FORM(_action=None, _method="post", _enctype="multipart/form-data"), None, None, None
if request.POST.get('labQ2file') and isinstance(request.POST.get('labQ2file'), FileUpload) and (request.POST.get('labQ2file').content_type.value == 'text/plain'):
    tmp = request.POST.get('labQ2file').file.read()
    if isinstance(tmp, bytes) and (str(tmp).find(vernier_tag_line) > -1):
        #labQ2file from file input file
        txttype, txt, xseries, yseries = vernier_file_type, tmp.decode('utf-8').replace('\r', "").strip(), 1, 2
elif request.forms.get('txtfile') and isinstance(request.forms.get('txtfile'), str):
    tmp = request.forms.get('txtfile')
    if (tmp.find(vernier_tag_line) > -1):
        #labQ2file pasted into txtfile input from like notepad
        txttype, txt = vernier_file_type, tmp.replace('\r', "").strip()
    else:
        #raw text
        txttype, txt = '(manually-typed %s data)' % ('batch' if (txtfile.upper().find('RUN') > -1) else 'list'), tmp.replace('\r', "").strip()
del tmp

mtypes = { 'PH':["pH","","pH"], 'RED':["Redox","mV","V"], 'COND':["Conductivity","µS/cm","L"], 'PRES':["Pressure","kPa","P"], 'VOL':["Volume","mL","V"], 'TEMP':["Temperature","°C","T"], 'TK':["Temperature","K","T"], 'DROP':["Drops","#",""] } #measurement types #'VOLUME':["Volume","mL","V"], 

if txttype:
    if ('LabQuest2' in txttype):
        header_line, data = None, [ ]
        for i, l in enumerate(txt.split('\n')):
            if (l[:16] == 'Vernier Format 2'):
                header_line = i
            elif (i == header_line+1):
                title = l.strip()
            elif (i == header_line+2) and (l[:4] == 'Run '):
                data.append({ 'title':(title if ('title' in locals()) else ""), 'axes':[ ], 'data':[ ] })
            elif (i == header_line+3): #type of measurement line
                for l in l.rstrip().upper().replace('\t', ',').split(','):
                    mt = { (l.find(k) > -1):v for k, v in mtypes.items() }
                    if any(mt):
                        data[-1]['axes'].append(copy(mt[True]))
                    else:
                        data[-1]['axes'].append(["","",""])
            elif (i == header_line+5): #units line
                for j, l in enumerate(l.rstrip().replace('\t', ',').split(',')):
                    if (l == "°C"):
                        data[-1]['axes'][j] = copy(mtypes['TEMP'])
                    elif (l == "K"):
                        data[-1]['axes'][j] = copy(mtypes['TK'])
                    elif (len(l) > 0):
                        data[-1]['axes'][j][1] = l
            elif (i == header_line+6) and (len(l.strip()) == 0):
                header_line = 999999999
            elif (header_line == 999999999) and (l.find('\t') > -1):
                l = l.strip().replace('\t', ',').split(',')
                if (len(l) > 1) and all([ is_str_float(d.strip()) for d in l ]):
                    data[-1]['data'].append(tuple( str_to_float(d.strip()) for d in l ))
    elif ('manual' in txttype):
        idi = { 'title':"Ambiguous Ill-Defined Plot Title", 'axes':[["Independent Variable","","X",], ["Dependent Variable","","Y",]], 'data':[ ] }
        data, oo = [], r""
        if (txt.upper().find('RUN') == -1):
            data.append(copy(idi))
        for l in txt.split('\n'):
            #oo += r"{}\n".format(l.strip().replace('\t', ', '))
            l = l.strip().replace('\t', ',').split(',')
            if (l[0].upper().find('RUN') > -1) or (l[0].upper().find('TRIAL') > -1):
                data.append(copy(idi))
            elif (len(l) > 1) and all([ is_str_float(d.strip()) for d in l ]):
                data[-1]['data'].append(tuple( str_to_float(d.strip()) for d in l ))
        #rtn.append(CAT(oo, BR()))
        del(idi)

rtn.append(CAT(DIV(DIV("Designed to upload LabQuest2 text (txt) outputs...", BR(), INPUT(_type="file", _name="labQ2file", _style="width:500px; font-size: 12pt; background-color:yellow;"), _style="background-color:none;"), DIV(XML("<b>OR</b>, manually enter data below..."), BR(), "(x, y) Data Entry.. .", BR(), TEXTAREA(txt if txt else demo_file, _name="txtfile"), BR(), "Enter for example \"Run 1\" at the top of each run or trial.", _style="float:left; max-width:440px; margin-right:12px; background-color:none;"), DIV("Column for X Series:", INPUT(_type="text", _class="integer", _name="xseries", _value=xseries if ('xseries' in locals()) else 1), XML("&emsp;"), "Column for Y Series:", INPUT(_type="text", _class="integer", _name="yseries", _value=yseries if ('yseries' in locals()) else 2), BR(), INPUT(_type="submit", _value="Upload Data"), ", or, just Upload to run the demonstration.", "" if (txt is None) else CAT(" ", SPAN(txttype, _style="font-weight:bold; color:maroon;")), _style="float:left; max-width:800px; background-color:none;"), DIV(_style="float:none; clear:both;"))))

def doFit(x, y, deg=1):
    fit, ymean = polyfit(x, y, deg, full=True), mean(y)
    #RSquared verified with Mathematica LinearModelFit["RSquared"]
    SSres, SStot = fit[1][0], sum([(d - ymean)**2 for d in y])
    return fit[0], (1 - SSres / SStot)

if ('data' in locals()):
    linear = lambda x, m, b: m*x + b
    rtn.append(str(data))
    pcolors = ('red', 'orange', 'blue', 'green', 'purple',) #plotly colors
    ix, iy, ir = str_to_int(xseries)-1, str_to_int(yseries)-1, data[0]['axes']
    #rtn.append(DIV(ix, ' ... ', iy, ' ... ',  str(ir), _class="error"))
    xaxis, yaxis = "", ""
    try:
        xaxis = f"{ir[ix][0]}{', '+ir[ix][2] if (ir[ix][2] != '') else ''}{' ('+ir[ix][1]+')' if (ir[ix][1] != '') else ''}"
    except Exception as E:
        xaxis = ""
        rtn.append(DIV(f'Could not process the X-Axis.  Message: "{E}"', _class="error"))
    try:
        yaxis = f"{ir[iy][0]}{', '+ir[iy][2] if (ir[iy][2] != '') else ''}{' ('+ir[iy][1]+')' if (ir[iy][1] != '') else ''}"
    except Exception as E:
        yaxis = ""
        rtn.append(DIV(f'Could not process the Y-Axis.  Message: "{E}"', _class="error"))
    try:
        fig = go.Figure()
        for i, run in enumerate(data):
            zipped_data = list(zip(*run['data']))
            x, y = zipped_data[ix], zipped_data[iy]
            fig.add_trace(go.Scatter(x=x, y=y, mode='markers', name=f"Run #{i+1}", marker=go.scatter.Marker(color=pcolors[i])))
            mb, RSquared = doFit(x, y)
            rtn.append(DIV(f"Equation of a Line for Run/Trial #{i+1}: ", SPAN(f" {run['axes'][iy][2] if (len(run['axes'][iy][2]) > 0) else 'Y'} = {mb[0]:#,.5g}*{run['axes'][ix][2] if (len(run['axes'][ix][2]) > 0) else 'X'} + {mb[1]:#,.5g}", _class="eq", _style=f"color:{pcolors[i]};"), XML(f", R<sup>2</sup> = {RSquared:.5f}"), _class="eq"))
            minypt, maxypt = tuple(map(min, zip(*run['data']))), tuple(map(max, zip(*run['data']))) #get the datapoints of the minimum and maximum y, respectively.
            xss = [minypt[0], maxypt[0]]
            fig.add_trace(go.Scatter(x=xss, y=[linear(d, *mb) for d in xss], mode='lines', name=f"Linear Fit #{i+1}", marker=go.scatter.Marker(color=pcolors[i])))
        fig.update_layout(xaxis=go.XAxis(title=xaxis), yaxis=go.YAxis(title=yaxis, anchor='x', side='left'), yaxis2=go.YAxis(title="", anchor='x', overlaying='y', side='right'))
        fig.update_layout(title=data[0]['title'], height=750, margin=go.layout.Margin(l=25, r=25, b=60, t=60, pad=0), plot_bgcolor="#f5f5f5", paper_bgcolor="White", showlegend=True)
        html = fig.to_html()
        rtn.append(XML(html[html.find('<div>'):html.rfind('</div>')+6].replace('<div>', '<div id="plotly">')))
    except Exception as E:
        rtn.append(DIV(f'Could not process or display the plot.  Message: "{E}"', _class="error"))
rtn.append(CAT("lecture by Stephen Lukacs, Ph.D., ©2011 - 2023; updated: March 7, 2023.  all data confirmed via ", A("lecture_data_analysis.nb", _href=URL('static', "pdf/lecture_data_analysis8.pdf"), _target="data_analysis"), "."))