the python books are obtained by running the following script:
bash << 'EOF'
current_date_time=$(date +"%Y-%m-%d %H:%M:%S")
output=$(ls /home/chris-jakoolit/christopherpaine_org/_code/bayesian-pca/*.ipynb)
output=$'this command was run on '"${current_date_time}"$':\n------------------------\n'"${output}"
output=$'```\n'"${output}"$'\n```'
echo "$output"
echo "$output" | wl-copy
EOF
this command was run on 2026-03-16 13:55:20:
------------------------
/home/chris-jakoolit/christopherpaine_org/_code/bayesian-pca/lets-go-baby.ipynb
/home/chris-jakoolit/christopherpaine_org/_code/bayesian-pca/PCA_Spot_Yields.ipynb
/home/chris-jakoolit/christopherpaine_org/_code/bayesian-pca/scratchpad.ipynb
/home/chris-jakoolit/christopherpaine_org/_code/bayesian-pca/Untitled.ipynb
scratchpad.ipynb
{
"cells": [
{
"cell_type": "markdown",
"id": "96d34c17-0a1d-44f7-baff-b7e016aacfbe",
"metadata": {},
"source": [
"# Getting Raw Data into Dataframes\n"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "affa3aab-495d-4dc6-aa5d-4856527174ab",
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import display, Markdown, HTML\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"id": "2d5e9fc9-a976-4f82-884a-7d45a498262a",
"metadata": {},
"source": [
"## Import Spot Yield Data and Perform Basic Reasonableness Check"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "71e1ae0a-efdf-4d40-a09f-667ef726aaa6",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"#load in first spreadsheet to df1\n",
"df1 = pd.read_excel(\"GLC Nominal month end data_1970 to 2015.xlsx\",sheet_name=\"4. spot curve\",engine=\"openpyxl\",skiprows=5,header=None)\n",
"#create an appropriate set of headers\n",
"col_names=pd.read_excel(\"GLC Nominal month end data_1970 to 2015.xlsx\",sheet_name=\"4. spot curve\",engine=\"openpyxl\",skiprows=3,nrows=1,header=None)\n",
"col_names[0]=\"Date\"\n",
"df1.columns = col_names.iloc[0] \n",
"#load in second spreadsheet to df2\n",
"df2 = pd.read_excel(\"GLC Nominal month end data_2016 to present.xlsx\",sheet_name=\"4. spot curve\",engine=\"openpyxl\",skiprows=5,header=None)\n",
"#create an appropriate set of headers\n",
"col_names2=pd.read_excel(\"GLC Nominal month end data_2016 to present.xlsx\",sheet_name=\"4. spot curve\",engine=\"openpyxl\",skiprows=3,nrows=1,header=None)\n",
"col_names2[0]=\"Date\"\n",
"df2.columns = col_names2.iloc[0]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "3df351e1-1722-4a86-b6e2-1b7e5b7904a8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The length of combined dataframe is 660 rows\n"
]
}
],
"source": [
"#join the two dataframes to create df\n",
"df = pd.concat([df1, df2], ignore_index=True)\n",
"print(\"The length of combined dataframe is \"+str(len(df))+\" rows\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "5f4d3d71-0b21-4aab-afad-fd42bd074527",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"**Checking Dataframe 1 - 1970 to 2015**"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"the first dates is 1970-01-31 and the last is 2015-12-31\n",
"one would therefore expect 12 x 46yrs = 552 entries\n",
"and indeed we see the number of rows in df is 552\n"
]
}
],
"source": [
"#producing some sense checks\n",
"display(Markdown(\"**Checking Dataframe 1 - 1970 to 2015**\")) \n",
"print(\"the first dates is \"+ str(df.iloc[0,0].strftime('%Y-%m-%d'))+\" and the last is \" +str(df.iloc[551,0].strftime('%Y-%m-%d') ))\n",
"print(\"one would therefore expect 12 x 46yrs = 552 entries\")\n",
"print(\"and indeed we see the number of rows in df is \"+str(len(df1)))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f55dcde1-6fb3-488b-bdc6-d91773e78b18",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"**Checking Dataframe 2 - 2015 to present**"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"the first dates is 2016-01-31 and the last is 2024-12-31\n",
"one would therefore expect 12 x 9yrs = 108 entries\n",
"and indeed we see the number of rows in df is 108\n"
]
}
],
"source": [
"display(Markdown(\"**Checking Dataframe 2 - 2015 to present**\")) \n",
"print(\"the first dates is \"+ str(df.iloc[552,0].strftime('%Y-%m-%d'))+\" and the last is \" +str(df.iloc[659,0].strftime('%Y-%m-%d') ))\n",
"print(\"one would therefore expect 12 x 9yrs = 108 entries\")\n",
"print(\"and indeed we see the number of rows in df is \"+str(len(df2)))"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "20129d73-6936-4e79-b2b9-85a56a5cce17",
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"**sum of values check**"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"manual inspection of the sum of all values in first spreadsheet is 191503.172322029\n",
"the sume of 1st dataframe is also 191503.17232202887\n"
]
},
{
"data": {
"text/html": [
"<span style='color:red;'>the very minor differences are because ....</span>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"the sum of all values in second spreadsheet is 17844.9993308767\n",
"the sume of 1st dataframe is also 17844.999330876683\n"
]
},
{
"data": {
"text/html": [
"<span style='color:red;'>the very minor differences are because ....</span>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"the sum of combined dataframe is 209348.17165290558\n",
"and the sum of the manually observed 191503.172322029 + 17844.9993308767 = 209348.1716529057\n"
]
}
],
"source": [
"display(Markdown(\"**sum of values check**\")) \n",
"print(\"manual inspection of the sum of all values in first spreadsheet is 191503.172322029\")\n",
"print(\"the sume of 1st dataframe is also \" + str(df1.iloc[:, 1:].sum().sum()))\n",
"display(HTML(\"<span style='color:red;'>the very minor differences are because ....</span>\"))\n",
"print(\"the sum of all values in second spreadsheet is 17844.9993308767\")\n",
"print(\"the sume of 1st dataframe is also \" + str(df2.iloc[:, 1:].sum().sum()))\n",
"display(HTML(\"<span style='color:red;'>the very minor differences are because ....</span>\"))\n",
"print(\"the sum of combined dataframe is \" + str(df.iloc[:, 1:].sum().sum()))\n",
"combined_total = 191503.172322029 + 17844.9993308767\n",
"print(\"and the sum of the manually observed 191503.172322029 + 17844.9993308767 = \" + str(combined_total))"
]
},
{
"cell_type": "markdown",
"id": "b87e47b9-6475-46af-9eb8-7ddcfb988bf9",
"metadata": {},
"source": [
"## Calculate Spot Yield Differences"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b123635b-fdde-4f86-9a41-51b605fe0b95",
"metadata": {},
"outputs": [],
"source": [
"# using np.log on the dataframe generates errors which is often the case when importing from an excel csv and we having this like values in quotes and blank spaces\n",
"# we only want to apply np.log to the numerical values so we have to temporarily drop the years column\n",
"df_numeric = df.drop(columns='Date', errors='ignore')\n",
"# coerce changes non numeric values to something appropriate\n",
"df_converted = df_numeric.apply(pd.to_numeric, errors='coerce')\n",
"mask_problem = df_numeric.ne(df_converted) # where values differ (non-convertible)\n",
"filtered = df_numeric[mask_problem]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "742c7f47-5aa7-43eb-995f-fef7a9483936",
"metadata": {},
"outputs": [],
"source": [
"print(filtered.index)\n",
"print(df_numeric.index)\n"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "8d4ea54b-53d2-42cd-8a90-b7f638e7f8ea",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.float64(209348.17165290558)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[:,1:].sum().sum()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "fab0cfbe-bdb0-4cb6-be7e-cd4760f0662c",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/nix/store/grz7lgv2i6x8335n1pfl38x906vc2iby-python3-3.12.10-env/lib/python3.12/site-packages/pandas/core/internals/blocks.py:393: RuntimeWarning: invalid value encountered in log\n",
" result = func(self.values, **kwargs)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0.5</th>\n",
" <th>1.0</th>\n",
" <th>1.5</th>\n",
" <th>2.0</th>\n",
" <th>2.5</th>\n",
" <th>3.0</th>\n",
" <th>3.5</th>\n",
" <th>4.0</th>\n",
" <th>4.5</th>\n",
" <th>5.0</th>\n",
" <th>...</th>\n",
" <th>35.5</th>\n",
" <th>36.0</th>\n",
" <th>36.5</th>\n",
" <th>37.0</th>\n",
" <th>37.5</th>\n",
" <th>38.0</th>\n",
" <th>38.5</th>\n",
" <th>39.0</th>\n",
" <th>39.5</th>\n",
" <th>40.0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NaN</td>\n",
" <td>2.155865</td>\n",
" <td>2.164177</td>\n",
" <td>2.163407</td>\n",
" <td>2.159182</td>\n",
" <td>2.153934</td>\n",
" <td>2.148557</td>\n",
" <td>2.143398</td>\n",
" <td>2.138608</td>\n",
" <td>2.134239</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>NaN</td>\n",
" <td>2.129794</td>\n",
" <td>2.127907</td>\n",
" <td>2.124743</td>\n",
" <td>2.120779</td>\n",
" <td>2.116447</td>\n",
" <td>2.112078</td>\n",
" <td>2.107884</td>\n",
" <td>2.103977</td>\n",
" <td>2.100422</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NaN</td>\n",
" <td>2.046943</td>\n",
" <td>2.051911</td>\n",
" <td>2.053485</td>\n",
" <td>2.053239</td>\n",
" <td>2.052194</td>\n",
" <td>2.050950</td>\n",
" <td>2.049814</td>\n",
" <td>2.048926</td>\n",
" <td>2.048347</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>2.029005</td>\n",
" <td>2.062340</td>\n",
" <td>2.076126</td>\n",
" <td>2.079747</td>\n",
" <td>2.078543</td>\n",
" <td>2.075374</td>\n",
" <td>2.071704</td>\n",
" <td>2.068308</td>\n",
" <td>2.065603</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NaN</td>\n",
" <td>2.000277</td>\n",
" <td>2.045864</td>\n",
" <td>2.062064</td>\n",
" <td>2.064012</td>\n",
" <td>2.059325</td>\n",
" <td>2.051845</td>\n",
" <td>2.043562</td>\n",
" <td>2.035601</td>\n",
" <td>2.028615</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>655</th>\n",
" <td>1.455826</td>\n",
" <td>1.439353</td>\n",
" <td>1.402119</td>\n",
" <td>1.375484</td>\n",
" <td>1.356925</td>\n",
" <td>1.343494</td>\n",
" <td>1.333993</td>\n",
" <td>1.327756</td>\n",
" <td>1.324277</td>\n",
" <td>1.323134</td>\n",
" <td>...</td>\n",
" <td>1.499410</td>\n",
" <td>1.497142</td>\n",
" <td>1.494746</td>\n",
" <td>1.492218</td>\n",
" <td>1.489558</td>\n",
" <td>1.486763</td>\n",
" <td>1.483831</td>\n",
" <td>1.480760</td>\n",
" <td>1.477550</td>\n",
" <td>1.474197</td>\n",
" </tr>\n",
" <tr>\n",
" <th>656</th>\n",
" <td>1.477274</td>\n",
" <td>1.425788</td>\n",
" <td>1.376319</td>\n",
" <td>1.346838</td>\n",
" <td>1.329441</td>\n",
" <td>1.319028</td>\n",
" <td>1.313292</td>\n",
" <td>1.310951</td>\n",
" <td>1.311175</td>\n",
" <td>1.313381</td>\n",
" <td>...</td>\n",
" <td>1.515755</td>\n",
" <td>1.513311</td>\n",
" <td>1.510736</td>\n",
" <td>1.508030</td>\n",
" <td>1.505189</td>\n",
" <td>1.502213</td>\n",
" <td>1.499099</td>\n",
" <td>1.495847</td>\n",
" <td>1.492453</td>\n",
" <td>1.488918</td>\n",
" </tr>\n",
" <tr>\n",
" <th>657</th>\n",
" <td>1.509063</td>\n",
" <td>1.479084</td>\n",
" <td>1.457223</td>\n",
" <td>1.445040</td>\n",
" <td>1.436732</td>\n",
" <td>1.430826</td>\n",
" <td>1.427094</td>\n",
" <td>1.425345</td>\n",
" <td>1.425352</td>\n",
" <td>1.426876</td>\n",
" <td>...</td>\n",
" <td>1.562482</td>\n",
" <td>1.559861</td>\n",
" <td>1.557129</td>\n",
" <td>1.554288</td>\n",
" <td>1.551334</td>\n",
" <td>1.548267</td>\n",
" <td>1.545084</td>\n",
" <td>1.541785</td>\n",
" <td>1.538368</td>\n",
" <td>1.534832</td>\n",
" </tr>\n",
" <tr>\n",
" <th>658</th>\n",
" <td>1.480481</td>\n",
" <td>1.439974</td>\n",
" <td>1.416794</td>\n",
" <td>1.403691</td>\n",
" <td>1.394121</td>\n",
" <td>1.386840</td>\n",
" <td>1.381801</td>\n",
" <td>1.378954</td>\n",
" <td>1.378166</td>\n",
" <td>1.379234</td>\n",
" <td>...</td>\n",
" <td>1.543082</td>\n",
" <td>1.540410</td>\n",
" <td>1.537605</td>\n",
" <td>1.534664</td>\n",
" <td>1.531586</td>\n",
" <td>1.528368</td>\n",
" <td>1.525007</td>\n",
" <td>1.521501</td>\n",
" <td>1.517847</td>\n",
" <td>1.514042</td>\n",
" </tr>\n",
" <tr>\n",
" <th>659</th>\n",
" <td>1.494679</td>\n",
" <td>1.460397</td>\n",
" <td>1.441731</td>\n",
" <td>1.434688</td>\n",
" <td>1.431337</td>\n",
" <td>1.429968</td>\n",
" <td>1.430298</td>\n",
" <td>1.432190</td>\n",
" <td>1.435510</td>\n",
" <td>1.440097</td>\n",
" <td>...</td>\n",
" <td>1.617167</td>\n",
" <td>1.614502</td>\n",
" <td>1.611717</td>\n",
" <td>1.608811</td>\n",
" <td>1.605781</td>\n",
" <td>1.602625</td>\n",
" <td>1.599343</td>\n",
" <td>1.595930</td>\n",
" <td>1.592387</td>\n",
" <td>1.588710</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>660 rows × 80 columns</p>\n",
"</div>"
],
"text/plain": [
"0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 \\\n",
"0 NaN 2.155865 2.164177 2.163407 2.159182 2.153934 2.148557 \n",
"1 NaN 2.129794 2.127907 2.124743 2.120779 2.116447 2.112078 \n",
"2 NaN 2.046943 2.051911 2.053485 2.053239 2.052194 2.050950 \n",
"3 NaN 2.029005 2.062340 2.076126 2.079747 2.078543 2.075374 \n",
"4 NaN 2.000277 2.045864 2.062064 2.064012 2.059325 2.051845 \n",
".. ... ... ... ... ... ... ... \n",
"655 1.455826 1.439353 1.402119 1.375484 1.356925 1.343494 1.333993 \n",
"656 1.477274 1.425788 1.376319 1.346838 1.329441 1.319028 1.313292 \n",
"657 1.509063 1.479084 1.457223 1.445040 1.436732 1.430826 1.427094 \n",
"658 1.480481 1.439974 1.416794 1.403691 1.394121 1.386840 1.381801 \n",
"659 1.494679 1.460397 1.441731 1.434688 1.431337 1.429968 1.430298 \n",
"\n",
"0 4.0 4.5 5.0 ... 35.5 36.0 36.5 \\\n",
"0 2.143398 2.138608 2.134239 ... NaN NaN NaN \n",
"1 2.107884 2.103977 2.100422 ... NaN NaN NaN \n",
"2 2.049814 2.048926 2.048347 ... NaN NaN NaN \n",
"3 2.071704 2.068308 2.065603 ... NaN NaN NaN \n",
"4 2.043562 2.035601 2.028615 ... NaN NaN NaN \n",
".. ... ... ... ... ... ... ... \n",
"655 1.327756 1.324277 1.323134 ... 1.499410 1.497142 1.494746 \n",
"656 1.310951 1.311175 1.313381 ... 1.515755 1.513311 1.510736 \n",
"657 1.425345 1.425352 1.426876 ... 1.562482 1.559861 1.557129 \n",
"658 1.378954 1.378166 1.379234 ... 1.543082 1.540410 1.537605 \n",
"659 1.432190 1.435510 1.440097 ... 1.617167 1.614502 1.611717 \n",
"\n",
"0 37.0 37.5 38.0 38.5 39.0 39.5 40.0 \n",
"0 NaN NaN NaN NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN NaN \n",
".. ... ... ... ... ... ... ... \n",
"655 1.492218 1.489558 1.486763 1.483831 1.480760 1.477550 1.474197 \n",
"656 1.508030 1.505189 1.502213 1.499099 1.495847 1.492453 1.488918 \n",
"657 1.554288 1.551334 1.548267 1.545084 1.541785 1.538368 1.534832 \n",
"658 1.534664 1.531586 1.528368 1.525007 1.521501 1.517847 1.514042 \n",
"659 1.608811 1.605781 1.602625 1.599343 1.595930 1.592387 1.588710 \n",
"\n",
"[660 rows x 80 columns]"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.log(df.iloc[:,1:])"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "089921ab-4e37-4661-a9eb-3da7cae5f52f",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"0\n",
"0.5 NaN\n",
"1.0 8.413131\n",
"1.5 8.397269\n",
"2.0 8.370748\n",
"2.5 8.337633\n",
" ... \n",
"38.0 NaN\n",
"38.5 NaN\n",
"39.0 NaN\n",
"39.5 NaN\n",
"40.0 NaN\n",
"Name: 1, Length: 80, dtype: object"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[1,1:]"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "a7663f0a-699f-4759-8f47-96735dfad6d9",
"metadata": {
"scrolled": true
},
"outputs": [
{
"ename": "TypeError",
"evalue": "loop of ufunc does not support argument 0 of type numpy.float64 which has no callable log method",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mAttributeError\u001b[39m Traceback (most recent call last)",
"\u001b[31mAttributeError\u001b[39m: 'numpy.float64' object has no attribute 'log'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[31mTypeError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[40]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mnp\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlog\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m.\u001b[49m\u001b[43miloc\u001b[49m\u001b[43m[\u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m/nix/store/grz7lgv2i6x8335n1pfl38x906vc2iby-python3-3.12.10-env/lib/python3.12/site-packages/pandas/core/generic.py:2171\u001b[39m, in \u001b[36mNDFrame.__array_ufunc__\u001b[39m\u001b[34m(self, ufunc, method, *inputs, **kwargs)\u001b[39m\n\u001b[32m 2167\u001b[39m \u001b[38;5;129m@final\u001b[39m\n\u001b[32m 2168\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__array_ufunc__\u001b[39m(\n\u001b[32m 2169\u001b[39m \u001b[38;5;28mself\u001b[39m, ufunc: np.ufunc, method: \u001b[38;5;28mstr\u001b[39m, *inputs: Any, **kwargs: Any\n\u001b[32m 2170\u001b[39m ):\n\u001b[32m-> \u001b[39m\u001b[32m2171\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43marraylike\u001b[49m\u001b[43m.\u001b[49m\u001b[43marray_ufunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mufunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m/nix/store/grz7lgv2i6x8335n1pfl38x906vc2iby-python3-3.12.10-env/lib/python3.12/site-packages/pandas/core/arraylike.py:399\u001b[39m, in \u001b[36marray_ufunc\u001b[39m\u001b[34m(self, ufunc, method, *inputs, **kwargs)\u001b[39m\n\u001b[32m 396\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.ndim == \u001b[32m1\u001b[39m:\n\u001b[32m 397\u001b[39m \u001b[38;5;66;03m# ufunc(series, ...)\u001b[39;00m\n\u001b[32m 398\u001b[39m inputs = \u001b[38;5;28mtuple\u001b[39m(extract_array(x, extract_numpy=\u001b[38;5;28;01mTrue\u001b[39;00m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m inputs)\n\u001b[32m--> \u001b[39m\u001b[32m399\u001b[39m result = \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mufunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 400\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 401\u001b[39m \u001b[38;5;66;03m# ufunc(dataframe)\u001b[39;00m\n\u001b[32m 402\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m method == \u001b[33m\"\u001b[39m\u001b[33m__call__\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kwargs:\n\u001b[32m 403\u001b[39m \u001b[38;5;66;03m# for np.<ufunc>(..) calls\u001b[39;00m\n\u001b[32m 404\u001b[39m \u001b[38;5;66;03m# kwargs cannot necessarily be handled block-by-block, so only\u001b[39;00m\n\u001b[32m 405\u001b[39m \u001b[38;5;66;03m# take this path if there are no kwargs\u001b[39;00m\n",
"\u001b[31mTypeError\u001b[39m: loop of ufunc does not support argument 0 of type numpy.float64 which has no callable log method"
]
}
],
"source": [
"np.log(df.iloc[1,1:])"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "ac62cdf6-496f-4cd3-94bb-54d1ba0cc5b7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0\n",
"0.5 <class 'numpy.float64'>\n",
"1.0 <class 'numpy.float64'>\n",
"1.5 <class 'numpy.float64'>\n",
"2.0 <class 'numpy.float64'>\n",
"2.5 <class 'numpy.float64'>\n",
" ... \n",
"38.0 <class 'numpy.float64'>\n",
"38.5 <class 'numpy.float64'>\n",
"39.0 <class 'numpy.float64'>\n",
"39.5 <class 'numpy.float64'>\n",
"40.0 <class 'numpy.float64'>\n",
"Name: 1, Length: 80, dtype: object"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[1, 1:].apply(type)\n"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "792a5228-dc32-423c-935b-31a9d3433d9f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1\n",
"<class 'numpy.float64'> 80\n",
"Name: count, dtype: int64"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[1, 1:].apply(type).value_counts()\n"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "655cb9ff-d2b8-4aa0-a808-8e8af70d7d3b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"31\n",
"object\n",
"<class 'pandas.core.series.Series'>\n"
]
},
{
"ename": "TypeError",
"evalue": "loop of ufunc does not support argument 0 of type numpy.float64 which has no callable log method",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mAttributeError\u001b[39m Traceback (most recent call last)",
"\u001b[31mAttributeError\u001b[39m: 'numpy.float64' object has no attribute 'log'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[31mTypeError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[43]\u001b[39m\u001b[32m, line 5\u001b[39m\n\u001b[32m 3\u001b[39m \u001b[38;5;28mprint\u001b[39m(row.dtypes) \u001b[38;5;66;03m# Check Series dtype\u001b[39;00m\n\u001b[32m 4\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mtype\u001b[39m(row)) \u001b[38;5;66;03m# Confirm it's a Series\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m \u001b[43mnp\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlog\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrow\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# Try again\u001b[39;00m\n",
"\u001b[36mFile \u001b[39m\u001b[32m/nix/store/grz7lgv2i6x8335n1pfl38x906vc2iby-python3-3.12.10-env/lib/python3.12/site-packages/pandas/core/generic.py:2171\u001b[39m, in \u001b[36mNDFrame.__array_ufunc__\u001b[39m\u001b[34m(self, ufunc, method, *inputs, **kwargs)\u001b[39m\n\u001b[32m 2167\u001b[39m \u001b[38;5;129m@final\u001b[39m\n\u001b[32m 2168\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__array_ufunc__\u001b[39m(\n\u001b[32m 2169\u001b[39m \u001b[38;5;28mself\u001b[39m, ufunc: np.ufunc, method: \u001b[38;5;28mstr\u001b[39m, *inputs: Any, **kwargs: Any\n\u001b[32m 2170\u001b[39m ):\n\u001b[32m-> \u001b[39m\u001b[32m2171\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43marraylike\u001b[49m\u001b[43m.\u001b[49m\u001b[43marray_ufunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mufunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m/nix/store/grz7lgv2i6x8335n1pfl38x906vc2iby-python3-3.12.10-env/lib/python3.12/site-packages/pandas/core/arraylike.py:399\u001b[39m, in \u001b[36marray_ufunc\u001b[39m\u001b[34m(self, ufunc, method, *inputs, **kwargs)\u001b[39m\n\u001b[32m 396\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.ndim == \u001b[32m1\u001b[39m:\n\u001b[32m 397\u001b[39m \u001b[38;5;66;03m# ufunc(series, ...)\u001b[39;00m\n\u001b[32m 398\u001b[39m inputs = \u001b[38;5;28mtuple\u001b[39m(extract_array(x, extract_numpy=\u001b[38;5;28;01mTrue\u001b[39;00m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m inputs)\n\u001b[32m--> \u001b[39m\u001b[32m399\u001b[39m result = \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mufunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 400\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 401\u001b[39m \u001b[38;5;66;03m# ufunc(dataframe)\u001b[39;00m\n\u001b[32m 402\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m method == \u001b[33m\"\u001b[39m\u001b[33m__call__\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kwargs:\n\u001b[32m 403\u001b[39m \u001b[38;5;66;03m# for np.<ufunc>(..) calls\u001b[39;00m\n\u001b[32m 404\u001b[39m \u001b[38;5;66;03m# kwargs cannot necessarily be handled block-by-block, so only\u001b[39;00m\n\u001b[32m 405\u001b[39m \u001b[38;5;66;03m# take this path if there are no kwargs\u001b[39;00m\n",
"\u001b[31mTypeError\u001b[39m: loop of ufunc does not support argument 0 of type numpy.float64 which has no callable log method"
]
}
],
"source": [
"row = df.iloc[1, 1:]\n",
"print(row.isna().sum()) # How many NaNs?\n",
"print(row.dtypes) # Check Series dtype\n",
"print(type(row)) # Confirm it's a Series\n",
"np.log(row) # Try again\n"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "c60760bb-8195-4121-9db2-06e8e20a26c8",
"metadata": {
"scrolled": true
},
"outputs": [
{
"ename": "TypeError",
"evalue": "loop of ufunc does not support argument 0 of type numpy.float64 which has no callable log method",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mAttributeError\u001b[39m Traceback (most recent call last)",
"\u001b[31mAttributeError\u001b[39m: 'numpy.float64' object has no attribute 'log'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[31mTypeError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[45]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mnp\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlog\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m.\u001b[49m\u001b[43miloc\u001b[49m\u001b[43m[\u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m.\u001b[49m\u001b[43mto_numpy\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[31mTypeError\u001b[39m: loop of ufunc does not support argument 0 of type numpy.float64 which has no callable log method"
]
}
],
"source": [
"np.log(df.iloc[1,1:].to_numpy())"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "af1d56d3-ed2e-4328-8aaa-79565e12c777",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>0.5</th>\n",
" <th>1</th>\n",
" <th>1.5</th>\n",
" <th>2</th>\n",
" <th>2.5</th>\n",
" <th>3</th>\n",
" <th>3.5</th>\n",
" <th>4</th>\n",
" <th>4.5</th>\n",
" <th>...</th>\n",
" <th>35.5</th>\n",
" <th>36</th>\n",
" <th>36.5</th>\n",
" <th>37</th>\n",
" <th>37.5</th>\n",
" <th>38</th>\n",
" <th>38.5</th>\n",
" <th>39</th>\n",
" <th>39.5</th>\n",
" <th>40</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1970-01-31</td>\n",
" <td>NaN</td>\n",
" <td>8.635354</td>\n",
" <td>8.707430</td>\n",
" <td>8.700727</td>\n",
" <td>8.664049</td>\n",
" <td>8.618702</td>\n",
" <td>8.572477</td>\n",
" <td>8.528372</td>\n",
" <td>8.487617</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1970-02-28</td>\n",
" <td>NaN</td>\n",
" <td>8.413131</td>\n",
" <td>8.397269</td>\n",
" <td>8.370748</td>\n",
" <td>8.337633</td>\n",
" <td>8.301590</td>\n",
" <td>8.265403</td>\n",
" <td>8.230804</td>\n",
" <td>8.198713</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1970-03-31</td>\n",
" <td>NaN</td>\n",
" <td>7.744187</td>\n",
" <td>7.782761</td>\n",
" <td>7.795017</td>\n",
" <td>7.793104</td>\n",
" <td>7.784963</td>\n",
" <td>7.775288</td>\n",
" <td>7.766459</td>\n",
" <td>7.759564</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1970-04-30</td>\n",
" <td>NaN</td>\n",
" <td>7.606512</td>\n",
" <td>7.864352</td>\n",
" <td>7.973522</td>\n",
" <td>8.002442</td>\n",
" <td>7.992813</td>\n",
" <td>7.967524</td>\n",
" <td>7.938335</td>\n",
" <td>7.911422</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1970-05-31</td>\n",
" <td>NaN</td>\n",
" <td>7.391107</td>\n",
" <td>7.735838</td>\n",
" <td>7.862182</td>\n",
" <td>7.877510</td>\n",
" <td>7.840673</td>\n",
" <td>7.782249</td>\n",
" <td>7.718053</td>\n",
" <td>7.656856</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 81 columns</p>\n",
"</div>"
],
"text/plain": [
"0 Date 0.5 1 1.5 2 2.5 3 3.5 \\\n",
"0 1970-01-31 NaN 8.635354 8.707430 8.700727 8.664049 8.618702 8.572477 \n",
"1 1970-02-28 NaN 8.413131 8.397269 8.370748 8.337633 8.301590 8.265403 \n",
"2 1970-03-31 NaN 7.744187 7.782761 7.795017 7.793104 7.784963 7.775288 \n",
"3 1970-04-30 NaN 7.606512 7.864352 7.973522 8.002442 7.992813 7.967524 \n",
"4 1970-05-31 NaN 7.391107 7.735838 7.862182 7.877510 7.840673 7.782249 \n",
"\n",
"0 4 4.5 ... 35.5 36 36.5 37 37.5 38 38.5 39 39.5 40 \n",
"0 8.528372 8.487617 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n",
"1 8.230804 8.198713 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n",
"2 7.766459 7.759564 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n",
"3 7.938335 7.911422 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n",
"4 7.718053 7.656856 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n",
"\n",
"[5 rows x 81 columns]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "30e4bd09-8995-48f3-9a35-2f7c1344b636",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "5f91cd8d-b3c6-487b-a64e-062493267579",
"metadata": {},
"source": [
"## Looking at Dataframes in Spreadsheet"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5f75258-e274-42ed-94d5-b700e196d448",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 20,
"id": "b05417cf-594a-49cb-a687-c3c19f43f80f",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'df_numeric' is not defined",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[20]\u001b[39m\u001b[32m, line 7\u001b[39m\n\u001b[32m 5\u001b[39m df2.to_excel(writer, sheet_name=\u001b[33m\"\u001b[39m\u001b[33mdf2\u001b[39m\u001b[33m\"\u001b[39m, index=\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[32m 6\u001b[39m df.to_excel(writer, sheet_name=\u001b[33m\"\u001b[39m\u001b[33mdf\u001b[39m\u001b[33m\"\u001b[39m, index=\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[32m----> \u001b[39m\u001b[32m7\u001b[39m \u001b[43mdf_numeric\u001b[49m.to_excel(writer, sheet_name=\u001b[33m\"\u001b[39m\u001b[33mdf_numeric\u001b[39m\u001b[33m\"\u001b[39m, index=\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[32m 8\u001b[39m df_converted.to_excel(writer, sheet_name=\u001b[33m\"\u001b[39m\u001b[33mdf_converted\u001b[39m\u001b[33m\"\u001b[39m, index=\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[32m 9\u001b[39m mask_problem.to_excel(writer, sheet_name=\u001b[33m\"\u001b[39m\u001b[33mmask_problem\u001b[39m\u001b[33m\"\u001b[39m, index=\u001b[38;5;28;01mFalse\u001b[39;00m)\n",
"\u001b[31mNameError\u001b[39m: name 'df_numeric' is not defined"
]
}
],
"source": [
"import os\n",
"\n",
"with pd.ExcelWriter(\"output.xlsx\", engine=\"openpyxl\") as writer:\n",
" df1.to_excel(writer, sheet_name=\"df1\", index=False)\n",
" df2.to_excel(writer, sheet_name=\"df2\", index=False)\n",
" df.to_excel(writer, sheet_name=\"df\", index=False)\n",
" df_numeric.to_excel(writer, sheet_name=\"df_numeric\", index=False)\n",
" df_converted.to_excel(writer, sheet_name=\"df_converted\", index=False)\n",
" mask_problem.to_excel(writer, sheet_name=\"mask_problem\", index=False)\n",
" filtered.to_excel(writer, sheet_name=\"filtered\", index=False)\n",
" col_names.to_excel(writer, sheet_name=\"col_names\", index=False)\n",
" col_names2.to_excel(writer, sheet_name=\"col_names2\", index=False)\n",
"os.system(\"libreoffice --calc output.xlsx & disown\")\n"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "18d96fed-f939-4051-87ce-3053e8cd043e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Min: 8.048354459961853\n",
"Any <= 0: False\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/nix-shell-4091-0/ipykernel_68792/2039625757.py:3: RuntimeWarning: invalid value encountered in less_equal\n",
" print(\"Any <= 0:\", np.any(row <= 0)) # Will trigger log error\n"
]
}
],
"source": [
"row = df.iloc[1, 1:].to_numpy()\n",
"print(\"Min:\", np.nanmin(row)) # Ignores NaN\n",
"print(\"Any <= 0:\", np.any(row <= 0)) # Will trigger log error\n"
]
},
{
"cell_type": "markdown",
"id": "106c408f-a80a-4e9f-a5fc-b3f20535bcf5",
"metadata": {},
"source": [
"## a check on the log calculation\n",
"∑log(x \n",
"i\n",
"\n",
" )=log(∏x \n",
"i\n",
"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "ce0dc267-c312-49c2-81da-ec6ac725af08",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[]\n"
]
}
],
"source": [
"row = df.iloc[1, 1:]\n",
"bad_values = [v for v in row if not isinstance(v, (float, np.float64))]\n",
"print(bad_values)\n"
]
},
{
"cell_type": "markdown",
"id": "120fd02d-7d10-49f0-bbcd-acd123f2a2ed",
"metadata": {},
"source": [
"#### for i, val in enumerate(row):\n",
" try:\n",
" np.log(val)\n",
" except Exception as e:\n",
" print(f\"Column {row.index[i]}: value={val!r} -> {type(val)} caused error: {e}\")\n"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "ad91eb76-6862-4c6a-b7ef-b38a2edf888d",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "loop of ufunc does not support argument 0 of type numpy.float64 which has no callable log method",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mAttributeError\u001b[39m Traceback (most recent call last)",
"\u001b[31mAttributeError\u001b[39m: 'numpy.float64' object has no attribute 'log'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[31mTypeError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[50]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mnp\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlog\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m.\u001b[49m\u001b[43miloc\u001b[49m\u001b[43m[\u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m.\u001b[49m\u001b[43mreset_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdrop\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m/nix/store/grz7lgv2i6x8335n1pfl38x906vc2iby-python3-3.12.10-env/lib/python3.12/site-packages/pandas/core/generic.py:2171\u001b[39m, in \u001b[36mNDFrame.__array_ufunc__\u001b[39m\u001b[34m(self, ufunc, method, *inputs, **kwargs)\u001b[39m\n\u001b[32m 2167\u001b[39m \u001b[38;5;129m@final\u001b[39m\n\u001b[32m 2168\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__array_ufunc__\u001b[39m(\n\u001b[32m 2169\u001b[39m \u001b[38;5;28mself\u001b[39m, ufunc: np.ufunc, method: \u001b[38;5;28mstr\u001b[39m, *inputs: Any, **kwargs: Any\n\u001b[32m 2170\u001b[39m ):\n\u001b[32m-> \u001b[39m\u001b[32m2171\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43marraylike\u001b[49m\u001b[43m.\u001b[49m\u001b[43marray_ufunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mufunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m/nix/store/grz7lgv2i6x8335n1pfl38x906vc2iby-python3-3.12.10-env/lib/python3.12/site-packages/pandas/core/arraylike.py:399\u001b[39m, in \u001b[36marray_ufunc\u001b[39m\u001b[34m(self, ufunc, method, *inputs, **kwargs)\u001b[39m\n\u001b[32m 396\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.ndim == \u001b[32m1\u001b[39m:\n\u001b[32m 397\u001b[39m \u001b[38;5;66;03m# ufunc(series, ...)\u001b[39;00m\n\u001b[32m 398\u001b[39m inputs = \u001b[38;5;28mtuple\u001b[39m(extract_array(x, extract_numpy=\u001b[38;5;28;01mTrue\u001b[39;00m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m inputs)\n\u001b[32m--> \u001b[39m\u001b[32m399\u001b[39m result = \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mufunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 400\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 401\u001b[39m \u001b[38;5;66;03m# ufunc(dataframe)\u001b[39;00m\n\u001b[32m 402\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m method == \u001b[33m\"\u001b[39m\u001b[33m__call__\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kwargs:\n\u001b[32m 403\u001b[39m \u001b[38;5;66;03m# for np.<ufunc>(..) calls\u001b[39;00m\n\u001b[32m 404\u001b[39m \u001b[38;5;66;03m# kwargs cannot necessarily be handled block-by-block, so only\u001b[39;00m\n\u001b[32m 405\u001b[39m \u001b[38;5;66;03m# take this path if there are no kwargs\u001b[39;00m\n",
"\u001b[31mTypeError\u001b[39m: loop of ufunc does not support argument 0 of type numpy.float64 which has no callable log method"
]
}
],
"source": [
"np.log(df.iloc[1, 1:].reset_index(drop=True))\n"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "aaa05503-8b20-440c-899a-762767ad34c8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ nan, 2.12979375, 2.12790656, 2.12474326, 2.12077932,\n",
" 2.11644705, 2.11207845, 2.10788375, 2.10397718, 2.100422 ,\n",
" 2.09724983, 2.09447212, 2.09208754, 2.09008667, 2.08845794,\n",
" 2.08719092, 2.0862761 , 2.08570455, 2.08546766, 2.08555702,\n",
" 2.08596432, 2.08668129, 2.0876996 , 2.08901092, 2.09060682,\n",
" 2.09247882, 2.09461838, 2.09701686, 2.09966558, 2.10255582,\n",
" 2.1056788 , 2.10902572, 2.11258776, 2.1163561 , 2.12032194,\n",
" 2.12447648, 2.12881098, 2.13331673, 2.13798508, 2.14280746,\n",
" 2.14777538, 2.15288042, 2.15811429, 2.16346878, 2.1689358 ,\n",
" 2.17450739, 2.18017571, 2.18593307, 2.19177198, 2.19768531,\n",
" nan, nan, nan, nan, nan,\n",
" nan, nan, nan, nan, nan,\n",
" nan, nan, nan, nan, nan,\n",
" nan, nan, nan, nan, nan,\n",
" nan, nan, nan, nan, nan,\n",
" nan, nan, nan, nan, nan])"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.log(pd.to_numeric(df.iloc[1, 1:], errors='coerce').values)\n"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "7865dae7-f222-42ad-87d4-ffc332c93e4b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/nix/store/grz7lgv2i6x8335n1pfl38x906vc2iby-python3-3.12.10-env/lib/python3.12/site-packages/pandas/core/internals/blocks.py:393: RuntimeWarning: invalid value encountered in log\n",
" result = func(self.values, **kwargs)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0.5</th>\n",
" <th>1.0</th>\n",
" <th>1.5</th>\n",
" <th>2.0</th>\n",
" <th>2.5</th>\n",
" <th>3.0</th>\n",
" <th>3.5</th>\n",
" <th>4.0</th>\n",
" <th>4.5</th>\n",
" <th>5.0</th>\n",
" <th>...</th>\n",
" <th>35.5</th>\n",
" <th>36.0</th>\n",
" <th>36.5</th>\n",
" <th>37.0</th>\n",
" <th>37.5</th>\n",
" <th>38.0</th>\n",
" <th>38.5</th>\n",
" <th>39.0</th>\n",
" <th>39.5</th>\n",
" <th>40.0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NaN</td>\n",
" <td>2.155865</td>\n",
" <td>2.164177</td>\n",
" <td>2.163407</td>\n",
" <td>2.159182</td>\n",
" <td>2.153934</td>\n",
" <td>2.148557</td>\n",
" <td>2.143398</td>\n",
" <td>2.138608</td>\n",
" <td>2.134239</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>NaN</td>\n",
" <td>2.129794</td>\n",
" <td>2.127907</td>\n",
" <td>2.124743</td>\n",
" <td>2.120779</td>\n",
" <td>2.116447</td>\n",
" <td>2.112078</td>\n",
" <td>2.107884</td>\n",
" <td>2.103977</td>\n",
" <td>2.100422</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NaN</td>\n",
" <td>2.046943</td>\n",
" <td>2.051911</td>\n",
" <td>2.053485</td>\n",
" <td>2.053239</td>\n",
" <td>2.052194</td>\n",
" <td>2.050950</td>\n",
" <td>2.049814</td>\n",
" <td>2.048926</td>\n",
" <td>2.048347</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>2.029005</td>\n",
" <td>2.062340</td>\n",
" <td>2.076126</td>\n",
" <td>2.079747</td>\n",
" <td>2.078543</td>\n",
" <td>2.075374</td>\n",
" <td>2.071704</td>\n",
" <td>2.068308</td>\n",
" <td>2.065603</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NaN</td>\n",
" <td>2.000277</td>\n",
" <td>2.045864</td>\n",
" <td>2.062064</td>\n",
" <td>2.064012</td>\n",
" <td>2.059325</td>\n",
" <td>2.051845</td>\n",
" <td>2.043562</td>\n",
" <td>2.035601</td>\n",
" <td>2.028615</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>655</th>\n",
" <td>1.455826</td>\n",
" <td>1.439353</td>\n",
" <td>1.402119</td>\n",
" <td>1.375484</td>\n",
" <td>1.356925</td>\n",
" <td>1.343494</td>\n",
" <td>1.333993</td>\n",
" <td>1.327756</td>\n",
" <td>1.324277</td>\n",
" <td>1.323134</td>\n",
" <td>...</td>\n",
" <td>1.499410</td>\n",
" <td>1.497142</td>\n",
" <td>1.494746</td>\n",
" <td>1.492218</td>\n",
" <td>1.489558</td>\n",
" <td>1.486763</td>\n",
" <td>1.483831</td>\n",
" <td>1.480760</td>\n",
" <td>1.477550</td>\n",
" <td>1.474197</td>\n",
" </tr>\n",
" <tr>\n",
" <th>656</th>\n",
" <td>1.477274</td>\n",
" <td>1.425788</td>\n",
" <td>1.376319</td>\n",
" <td>1.346838</td>\n",
" <td>1.329441</td>\n",
" <td>1.319028</td>\n",
" <td>1.313292</td>\n",
" <td>1.310951</td>\n",
" <td>1.311175</td>\n",
" <td>1.313381</td>\n",
" <td>...</td>\n",
" <td>1.515755</td>\n",
" <td>1.513311</td>\n",
" <td>1.510736</td>\n",
" <td>1.508030</td>\n",
" <td>1.505189</td>\n",
" <td>1.502213</td>\n",
" <td>1.499099</td>\n",
" <td>1.495847</td>\n",
" <td>1.492453</td>\n",
" <td>1.488918</td>\n",
" </tr>\n",
" <tr>\n",
" <th>657</th>\n",
" <td>1.509063</td>\n",
" <td>1.479084</td>\n",
" <td>1.457223</td>\n",
" <td>1.445040</td>\n",
" <td>1.436732</td>\n",
" <td>1.430826</td>\n",
" <td>1.427094</td>\n",
" <td>1.425345</td>\n",
" <td>1.425352</td>\n",
" <td>1.426876</td>\n",
" <td>...</td>\n",
" <td>1.562482</td>\n",
" <td>1.559861</td>\n",
" <td>1.557129</td>\n",
" <td>1.554288</td>\n",
" <td>1.551334</td>\n",
" <td>1.548267</td>\n",
" <td>1.545084</td>\n",
" <td>1.541785</td>\n",
" <td>1.538368</td>\n",
" <td>1.534832</td>\n",
" </tr>\n",
" <tr>\n",
" <th>658</th>\n",
" <td>1.480481</td>\n",
" <td>1.439974</td>\n",
" <td>1.416794</td>\n",
" <td>1.403691</td>\n",
" <td>1.394121</td>\n",
" <td>1.386840</td>\n",
" <td>1.381801</td>\n",
" <td>1.378954</td>\n",
" <td>1.378166</td>\n",
" <td>1.379234</td>\n",
" <td>...</td>\n",
" <td>1.543082</td>\n",
" <td>1.540410</td>\n",
" <td>1.537605</td>\n",
" <td>1.534664</td>\n",
" <td>1.531586</td>\n",
" <td>1.528368</td>\n",
" <td>1.525007</td>\n",
" <td>1.521501</td>\n",
" <td>1.517847</td>\n",
" <td>1.514042</td>\n",
" </tr>\n",
" <tr>\n",
" <th>659</th>\n",
" <td>1.494679</td>\n",
" <td>1.460397</td>\n",
" <td>1.441731</td>\n",
" <td>1.434688</td>\n",
" <td>1.431337</td>\n",
" <td>1.429968</td>\n",
" <td>1.430298</td>\n",
" <td>1.432190</td>\n",
" <td>1.435510</td>\n",
" <td>1.440097</td>\n",
" <td>...</td>\n",
" <td>1.617167</td>\n",
" <td>1.614502</td>\n",
" <td>1.611717</td>\n",
" <td>1.608811</td>\n",
" <td>1.605781</td>\n",
" <td>1.602625</td>\n",
" <td>1.599343</td>\n",
" <td>1.595930</td>\n",
" <td>1.592387</td>\n",
" <td>1.588710</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>660 rows × 80 columns</p>\n",
"</div>"
],
"text/plain": [
"0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 \\\n",
"0 NaN 2.155865 2.164177 2.163407 2.159182 2.153934 2.148557 \n",
"1 NaN 2.129794 2.127907 2.124743 2.120779 2.116447 2.112078 \n",
"2 NaN 2.046943 2.051911 2.053485 2.053239 2.052194 2.050950 \n",
"3 NaN 2.029005 2.062340 2.076126 2.079747 2.078543 2.075374 \n",
"4 NaN 2.000277 2.045864 2.062064 2.064012 2.059325 2.051845 \n",
".. ... ... ... ... ... ... ... \n",
"655 1.455826 1.439353 1.402119 1.375484 1.356925 1.343494 1.333993 \n",
"656 1.477274 1.425788 1.376319 1.346838 1.329441 1.319028 1.313292 \n",
"657 1.509063 1.479084 1.457223 1.445040 1.436732 1.430826 1.427094 \n",
"658 1.480481 1.439974 1.416794 1.403691 1.394121 1.386840 1.381801 \n",
"659 1.494679 1.460397 1.441731 1.434688 1.431337 1.429968 1.430298 \n",
"\n",
"0 4.0 4.5 5.0 ... 35.5 36.0 36.5 \\\n",
"0 2.143398 2.138608 2.134239 ... NaN NaN NaN \n",
"1 2.107884 2.103977 2.100422 ... NaN NaN NaN \n",
"2 2.049814 2.048926 2.048347 ... NaN NaN NaN \n",
"3 2.071704 2.068308 2.065603 ... NaN NaN NaN \n",
"4 2.043562 2.035601 2.028615 ... NaN NaN NaN \n",
".. ... ... ... ... ... ... ... \n",
"655 1.327756 1.324277 1.323134 ... 1.499410 1.497142 1.494746 \n",
"656 1.310951 1.311175 1.313381 ... 1.515755 1.513311 1.510736 \n",
"657 1.425345 1.425352 1.426876 ... 1.562482 1.559861 1.557129 \n",
"658 1.378954 1.378166 1.379234 ... 1.543082 1.540410 1.537605 \n",
"659 1.432190 1.435510 1.440097 ... 1.617167 1.614502 1.611717 \n",
"\n",
"0 37.0 37.5 38.0 38.5 39.0 39.5 40.0 \n",
"0 NaN NaN NaN NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN NaN \n",
".. ... ... ... ... ... ... ... \n",
"655 1.492218 1.489558 1.486763 1.483831 1.480760 1.477550 1.474197 \n",
"656 1.508030 1.505189 1.502213 1.499099 1.495847 1.492453 1.488918 \n",
"657 1.554288 1.551334 1.548267 1.545084 1.541785 1.538368 1.534832 \n",
"658 1.534664 1.531586 1.528368 1.525007 1.521501 1.517847 1.514042 \n",
"659 1.608811 1.605781 1.602625 1.599343 1.595930 1.592387 1.588710 \n",
"\n",
"[660 rows x 80 columns]"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.log(pd.to_numeric(df.iloc[:, 1:].stack(), errors='coerce').unstack())\n"
]
},
{
"cell_type": "markdown",
"id": "9ed6bbaf-99c2-4128-b37a-ea855e6c63de",
"metadata": {},
"source": [
"pd.to_numeric(..., errors='coerce') converts anything non-numeric to NaN\n",
"\n",
".values or .unstack() strips weird index metadata\n",
"\n",
"np.log(...) then applies safely"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}