diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | python-trr265.spec | 1271 | ||||
-rw-r--r-- | sources | 1 |
3 files changed, 1273 insertions, 0 deletions
@@ -0,0 +1 @@ +/trr265-0.0.10.tar.gz diff --git a/python-trr265.spec b/python-trr265.spec new file mode 100644 index 0000000..8045695 --- /dev/null +++ b/python-trr265.spec @@ -0,0 +1,1271 @@ +%global _empty_manifest_terminate_build 0 +Name: python-trr265 +Version: 0.0.10 +Release: 1 +Summary: The TRR 265 analysis pipeline. +License: Apache Software License 2.0 +URL: https://github.com/hgzech/trr265/tree/master/ +Source0: https://mirrors.nju.edu.cn/pypi/web/packages/8a/46/2ef06d6533e1645d9360c4c2fb86b4040b238e30808da1fa57d88d820769/trr265-0.0.10.tar.gz +BuildArch: noarch + +Requires: python3-pip +Requires: python3-packaging +Requires: python3-bs4 +Requires: python3-pandas +Requires: python3-numpy +Requires: python3-pyarrow +Requires: python3-openpyxl +Requires: python3-lxml +Requires: python3-matplotlib +Requires: python3-seaborn +Requires: python3-fastcore + +%description +# TRR 265 +> This module handles analysis of the TRR265 data. + + +## Install + +`pip install trr265` + +`pip install biuR` (optional but needed for most analyses) + +## How to use + +```python +from pygments.formatters import HtmlFormatter +from pygments import highlight +import IPython +import inspect +from pygments.lexers import PythonLexer + + +def display_function(the_function): + formatter = HtmlFormatter() + return IPython.display.HTML('<style type="text/css">{}</style>{}'.format( + formatter.get_style_defs('.highlight'), + highlight(inspect.getsource(the_function), PythonLexer(), formatter))) +``` + +```python +display_function(dp.get_mov_data) +``` + + + + +<style type="text/css">pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight .hll { background-color: #ffffcc } +.highlight { background: #f8f8f8; } +.highlight .c { color: #408080; font-style: italic } /* Comment */ +.highlight .err { border: 1px solid #FF0000 } /* Error */ +.highlight .k { color: #008000; font-weight: bold } /* Keyword */ +.highlight .o { color: #666666 } /* Operator */ +.highlight .ch { color: #408080; font-style: italic } /* Comment.Hashbang */ +.highlight .cm { color: #408080; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #BC7A00 } /* Comment.Preproc */ +.highlight .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */ +.highlight .c1 { color: #408080; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #408080; font-style: italic } /* Comment.Special */ +.highlight .gd { color: #A00000 } /* Generic.Deleted */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .gr { color: #FF0000 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #888888 } /* Generic.Output */ +.highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #0044DD } /* Generic.Traceback */ +.highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #008000 } /* Keyword.Pseudo */ +.highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #B00040 } /* Keyword.Type */ +.highlight .m { color: #666666 } /* Literal.Number */ +.highlight .s { color: #BA2121 } /* Literal.String */ +.highlight .na { color: #7D9029 } /* Name.Attribute */ +.highlight .nb { color: #008000 } /* Name.Builtin */ +.highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */ +.highlight .no { color: #880000 } /* Name.Constant */ +.highlight .nd { color: #AA22FF } /* Name.Decorator */ +.highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */ +.highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #0000FF } /* Name.Function */ +.highlight .nl { color: #A0A000 } /* Name.Label */ +.highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ +.highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #19177C } /* Name.Variable */ +.highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mb { color: #666666 } /* Literal.Number.Bin */ +.highlight .mf { color: #666666 } /* Literal.Number.Float */ +.highlight .mh { color: #666666 } /* Literal.Number.Hex */ +.highlight .mi { color: #666666 } /* Literal.Number.Integer */ +.highlight .mo { color: #666666 } /* Literal.Number.Oct */ +.highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +.highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +.highlight .sc { color: #BA2121 } /* Literal.String.Char */ +.highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +.highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +.highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ +.highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +.highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ +.highlight .sx { color: #008000 } /* Literal.String.Other */ +.highlight .sr { color: #BB6688 } /* Literal.String.Regex */ +.highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +.highlight .ss { color: #19177C } /* Literal.String.Symbol */ +.highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +.highlight .fm { color: #0000FF } /* Name.Function.Magic */ +.highlight .vc { color: #19177C } /* Name.Variable.Class */ +.highlight .vg { color: #19177C } /* Name.Variable.Global */ +.highlight .vi { color: #19177C } /* Name.Variable.Instance */ +.highlight .vm { color: #19177C } /* Name.Variable.Magic */ +.highlight .il { color: #666666 } /* Literal.Number.Integer.Long */</style><div class="highlight"><pre><span></span><span class="nd">@patch</span> +<span class="nd">@get_efficiently</span> +<span class="k">def</span> <span class="nf">get_mov_data</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span><span class="n">DataProvider</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> This function gets Movisense data</span> +<span class="sd"> 1) We create unique participnat IDs (e.g. "b001"; this is necessary as sites use overapping IDs)</span> +<span class="sd"> 2) We merge double IDs, so participants with two IDs only have one (for this duplicate_ids.csv has to be updated)</span> +<span class="sd"> 3) We remove pilot participants</span> +<span class="sd"> 4) We get starting dates (from the participant overviews in movisense; downloaded as html)</span> +<span class="sd"> 5) We calculate sampling days and end dates based on the starting dates</span> +<span class="sd"> """</span> + <span class="c1"># Loading raw data</span> + <span class="n">mov_berlin</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_berlin_path</span><span class="p">,</span> <span class="n">sep</span> <span class="o">=</span> <span class="s1">';'</span><span class="p">)</span> + <span class="n">mov_dresden</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_dresden_path</span><span class="p">,</span> <span class="n">sep</span> <span class="o">=</span> <span class="s1">';'</span><span class="p">)</span> + <span class="n">mov_mannheim</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_mannheim_path</span><span class="p">,</span> <span class="n">sep</span> <span class="o">=</span> <span class="s1">';'</span><span class="p">)</span> + + <span class="c1"># Merging (participant numbers repeat so we add the first letter of location)</span> + <span class="n">mov_berlin</span><span class="p">[</span><span class="s1">'location'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'berlin'</span> + <span class="n">mov_dresden</span><span class="p">[</span><span class="s1">'location'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'dresden'</span> + <span class="n">mov_mannheim</span><span class="p">[</span><span class="s1">'location'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'mannheim'</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">concat</span><span class="p">([</span><span class="n">mov_berlin</span><span class="p">,</span><span class="n">mov_dresden</span><span class="p">,</span><span class="n">mov_mannheim</span><span class="p">])</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'participant'</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="s1">'location'</span><span class="p">]</span><span class="o">.</span><span class="n">str</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">+</span> <span class="n">df</span><span class="o">.</span><span class="n">Participant</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="s1">'</span><span class="si">%03d</span><span class="s1">'</span><span class="o">%</span><span class="nb">int</span><span class="p">(</span><span class="n">x</span><span class="p">))</span> + <span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">columns</span> <span class="o">=</span> <span class="s1">'Participant'</span><span class="p">,</span> <span class="n">inplace</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="c1"># Dropping old participant column to avoid mistakes</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'trigger_date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">Trigger_date</span> <span class="o">+</span> <span class="s1">' '</span> <span class="o">+</span> <span class="n">df</span><span class="o">.</span><span class="n">Trigger_time</span><span class="p">)</span> + + <span class="c1"># Merging double IDs (for participants with several movisense IDs)</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'participant'</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">get_duplicate_mov_ids</span><span class="p">())</span> + + <span class="c1"># Removing pilot participants</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="o">~</span><span class="n">df</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">str</span><span class="p">)</span><span class="o">.</span><span class="n">str</span><span class="o">.</span><span class="n">contains</span><span class="p">(</span><span class="s1">'test'</span><span class="p">)]</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="o">~</span><span class="n">df</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">isin</span><span class="p">([</span><span class="s1">'m157'</span><span class="p">,</span> <span class="s1">'b010'</span><span class="p">,</span> <span class="s1">'b006'</span><span class="p">,</span> <span class="s1">'d001'</span><span class="p">,</span> <span class="s1">'d002'</span><span class="p">,</span> <span class="s1">'d042'</span><span class="p">,</span> <span class="s1">'m024'</span><span class="p">,</span> <span class="s1">'m028'</span><span class="p">,</span> <span class="s1">'m071'</span><span class="p">,</span> <span class="s1">'m079'</span><span class="p">,</span> <span class="s1">'m107'</span><span class="p">])]</span> + + + <span class="c1"># Adding starting dates to get sampling days</span> + <span class="k">def</span> <span class="nf">get_starting_dates</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">pp_prefix</span> <span class="o">=</span> <span class="s1">''</span><span class="p">):</span> + <span class="n">soup</span> <span class="o">=</span> <span class="n">bs</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">())</span> + <span class="n">ids</span> <span class="o">=</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">soup</span><span class="o">.</span><span class="n">find_all</span><span class="p">(</span><span class="s2">"td"</span><span class="p">,</span> <span class="n">class_</span> <span class="o">=</span> <span class="s1">'simpleId'</span><span class="p">)]</span> + <span class="n">c_dates</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">find_all</span><span class="p">(</span><span class="s2">"span"</span><span class="p">)[</span><span class="mi">0</span><span class="p">][</span><span class="s1">'title'</span><span class="p">]</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">soup</span><span class="o">.</span><span class="n">find_all</span><span class="p">(</span><span class="s2">"td"</span><span class="p">,</span> <span class="n">class_</span> <span class="o">=</span> <span class="s1">'coupleDate'</span><span class="p">)]</span> + <span class="n">s_dates</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="p">[</span><span class="s1">'value'</span><span class="p">]</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">soup</span><span class="o">.</span><span class="n">find_all</span><span class="p">(</span><span class="s2">"input"</span><span class="p">,</span> <span class="n">class_</span> <span class="o">=</span> <span class="s1">'dp startDate'</span><span class="p">)]</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">'participant'</span><span class="p">:</span><span class="n">ids</span><span class="p">,</span><span class="s1">'coupling_date'</span><span class="p">:</span><span class="n">c_dates</span><span class="p">,</span><span class="s1">'starting_date'</span><span class="p">:</span><span class="n">s_dates</span><span class="p">})</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'coupling_date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">coupling_date</span><span class="p">)</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'starting_date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">starting_date</span><span class="p">)</span> + <span class="n">df</span><span class="o">.</span><span class="n">starting_date</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">coupling_date</span><span class="p">,</span><span class="n">inplace</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'participant'</span><span class="p">]</span> <span class="o">=</span> <span class="n">pp_prefix</span> <span class="o">+</span> <span class="n">df</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="s1">'</span><span class="si">%03d</span><span class="s1">'</span><span class="o">%</span><span class="nb">int</span><span class="p">(</span><span class="n">x</span><span class="p">))</span> + <span class="k">return</span> <span class="n">df</span> + + <span class="n">starting_dates</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">concat</span><span class="p">([</span> + <span class="n">get_starting_dates</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_berlin_starting_dates_path</span><span class="p">,</span> <span class="s1">'b'</span><span class="p">),</span> + <span class="n">get_starting_dates</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_dresden_starting_dates_path</span><span class="p">,</span> <span class="s1">'d'</span><span class="p">),</span> + <span class="n">get_starting_dates</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_mannheim_starting_dates_path</span><span class="p">,</span> <span class="s1">'m'</span><span class="p">)</span> + <span class="p">])</span> + <span class="c1"># For participants with several movisense IDs we use the first coupling date</span> + <span class="n">starting_dates</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">get_duplicate_mov_ids</span><span class="p">(),</span> <span class="n">inplace</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> + <span class="n">starting_dates</span> <span class="o">=</span> <span class="n">starting_dates</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">'participant'</span><span class="p">)[[</span><span class="s1">'starting_date'</span><span class="p">,</span><span class="s1">'coupling_date'</span><span class="p">]]</span><span class="o">.</span><span class="n">min</span><span class="p">()</span><span class="o">.</span><span class="n">reset_index</span><span class="p">()</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">starting_dates</span><span class="p">,</span> <span class="n">on</span><span class="o">=</span><span class="s2">"participant"</span><span class="p">,</span> <span class="n">how</span> <span class="o">=</span> <span class="s1">'left'</span><span class="p">,</span> <span class="n">indicator</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> + <span class="c1"># Checking if starting dates were downloaded</span> + <span class="k">if</span> <span class="s2">"left_only"</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">_merge</span><span class="o">.</span><span class="n">unique</span><span class="p">():</span> + <span class="n">no_starting_dates</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s1">'_merge == "left_only"'</span><span class="p">)</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"Starting dates missing for participants below. Did you download the participant overviews as html?"</span><span class="p">,</span> <span class="n">no_starting_dates</span><span class="p">)</span> + <span class="c1"># Calculating movisense sampling day, adding date and end_date</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'sampling_day'</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="s1">'trigger_date'</span><span class="p">]</span> <span class="o">-</span> <span class="n">df</span><span class="p">[</span><span class="s1">'starting_date'</span><span class="p">])</span><span class="o">.</span><span class="n">dt</span><span class="o">.</span><span class="n">days</span> <span class="o">+</span> <span class="mi">1</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">trigger_date</span><span class="o">.</span><span class="n">dt</span><span class="o">.</span><span class="n">date</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'end_date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">date</span> <span class="o">+</span> <span class="n">pd</span><span class="o">.</span><span class="n">DateOffset</span><span class="p">(</span><span class="n">days</span> <span class="o">=</span> <span class="mi">365</span><span class="p">)</span> + <span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="s1">'mov_index'</span><span class="p">,</span><span class="n">inplace</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> + <span class="c1"># Adding redcap IDs</span> + <span class="n">ids_table</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_ba_data</span><span class="p">()[[</span><span class="s1">'participant_id'</span><span class="p">,</span><span class="s1">'mov_id'</span><span class="p">]]</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s1">'mov_id==mov_id'</span><span class="p">)</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">'mov_id'</span><span class="p">)</span><span class="o">.</span><span class="n">first</span><span class="p">()</span> + <span class="n">ids_table</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'redcap_id'</span><span class="p">]</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">ids_table</span><span class="p">,</span> <span class="n">left_on</span><span class="o">=</span><span class="s1">'participant'</span><span class="p">,</span> <span class="n">right_index</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">how</span> <span class="o">=</span> <span class="s1">'left'</span><span class="p">)</span> + <span class="c1"># Filtering out participants with no associated redcap data</span> + <span class="n">no_redcap</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s2">"redcap_id.isna()"</span><span class="p">)</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"Participants: </span><span class="si">%s</span><span class="s2"> have no associated redcap IDs and are excluded from the following analyses."</span><span class="o">%</span><span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">no_redcap</span><span class="p">))</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">redcap_id</span><span class="o">.</span><span class="n">isna</span><span class="p">()</span><span class="o">==</span><span class="kc">False</span><span class="p">]</span> + <span class="k">return</span> <span class="n">df</span> +</pre></div> + + + + +```python +#%load_ext autoreload +#%autoreload 2 +from trr265.data_provider import DataProvider +dp = DataProvider('/Users/hilmarzech/Projects/trr265/trr265/data/') # Path to data folder (containing raw, interim, external, and processed) +dp.get_two_day_data().iloc[:20][['participant','date','MDBF_zufrieden','g_alc']] +``` + + + + +<div> +<style scoped> + .dataframe tbody tr th:only-of-type { + vertical-align: middle; + } + + .dataframe tbody tr th { + vertical-align: top; + } + + .dataframe thead th { + text-align: right; + } +</style> +<table border="1" class="dataframe"> + <thead> + <tr style="text-align: right;"> + <th></th> + <th>participant</th> + <th>date</th> + <th>MDBF_zufrieden</th> + <th>g_alc</th> + </tr> + <tr> + <th>two_day_index</th> + <th></th> + <th></th> + <th></th> + <th></th> + </tr> + </thead> + <tbody> + <tr> + <th>0</th> + <td>b001</td> + <td>2020-02-22</td> + <td>NaN</td> + <td>6.4</td> + </tr> + <tr> + <th>1</th> + <td>b001</td> + <td>2020-02-23</td> + <td>NaN</td> + <td>35.2</td> + </tr> + <tr> + <th>2</th> + <td>b001</td> + <td>2020-02-24</td> + <td>2.0</td> + <td>NaN</td> + </tr> + <tr> + <th>3</th> + <td>b001</td> + <td>2020-02-25</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>4</th> + <td>b001</td> + <td>2020-02-26</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>5</th> + <td>b001</td> + <td>2020-02-27</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>6</th> + <td>b001</td> + <td>2020-02-28</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>7</th> + <td>b001</td> + <td>2020-02-29</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>8</th> + <td>b001</td> + <td>2020-03-01</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>9</th> + <td>b001</td> + <td>2020-03-02</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>10</th> + <td>b001</td> + <td>2020-03-03</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>11</th> + <td>b001</td> + <td>2020-03-04</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>12</th> + <td>b001</td> + <td>2020-03-05</td> + <td>NaN</td> + <td>0.0</td> + </tr> + <tr> + <th>13</th> + <td>b001</td> + <td>2020-03-06</td> + <td>NaN</td> + <td>57.6</td> + </tr> + <tr> + <th>14</th> + <td>b001</td> + <td>2020-03-07</td> + <td>3.0</td> + <td>NaN</td> + </tr> + <tr> + <th>15</th> + <td>b001</td> + <td>2020-03-08</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>16</th> + <td>b001</td> + <td>2020-03-09</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>17</th> + <td>b001</td> + <td>2020-03-10</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>18</th> + <td>b001</td> + <td>2020-03-11</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>19</th> + <td>b001</td> + <td>2020-03-12</td> + <td>NaN</td> + <td>NaN</td> + </tr> + </tbody> +</table> +</div> + + + +## Required data + +### Phone screening +- data/external/b7_participants.xlsx <- from Hilmar +- data/raw/phonescreening.csv <- from redcap +- data/external/phone_codebook.html <- from redcap + +### Basic assessment (from redcap) +- data/raw/ba.csv <- from redcap +- data/external/ba_codebook.html <- from redcap + +### Movisens +- all basic assessment data (see above) +- data/raw/mov_data_b.csv +- data/raw/mov_data_d.csv +- data/raw/mov_data_m.csv +- data/raw/starting_dates_b.csv +- data/raw/starting_dates_d.csv +- data/raw/starting_dates_m.csv +- data/external/alcohol_per_drink.csv <- from Hilmar + + + + +%package -n python3-trr265 +Summary: The TRR 265 analysis pipeline. +Provides: python-trr265 +BuildRequires: python3-devel +BuildRequires: python3-setuptools +BuildRequires: python3-pip +%description -n python3-trr265 +# TRR 265 +> This module handles analysis of the TRR265 data. + + +## Install + +`pip install trr265` + +`pip install biuR` (optional but needed for most analyses) + +## How to use + +```python +from pygments.formatters import HtmlFormatter +from pygments import highlight +import IPython +import inspect +from pygments.lexers import PythonLexer + + +def display_function(the_function): + formatter = HtmlFormatter() + return IPython.display.HTML('<style type="text/css">{}</style>{}'.format( + formatter.get_style_defs('.highlight'), + highlight(inspect.getsource(the_function), PythonLexer(), formatter))) +``` + +```python +display_function(dp.get_mov_data) +``` + + + + +<style type="text/css">pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight .hll { background-color: #ffffcc } +.highlight { background: #f8f8f8; } +.highlight .c { color: #408080; font-style: italic } /* Comment */ +.highlight .err { border: 1px solid #FF0000 } /* Error */ +.highlight .k { color: #008000; font-weight: bold } /* Keyword */ +.highlight .o { color: #666666 } /* Operator */ +.highlight .ch { color: #408080; font-style: italic } /* Comment.Hashbang */ +.highlight .cm { color: #408080; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #BC7A00 } /* Comment.Preproc */ +.highlight .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */ +.highlight .c1 { color: #408080; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #408080; font-style: italic } /* Comment.Special */ +.highlight .gd { color: #A00000 } /* Generic.Deleted */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .gr { color: #FF0000 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #888888 } /* Generic.Output */ +.highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #0044DD } /* Generic.Traceback */ +.highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #008000 } /* Keyword.Pseudo */ +.highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #B00040 } /* Keyword.Type */ +.highlight .m { color: #666666 } /* Literal.Number */ +.highlight .s { color: #BA2121 } /* Literal.String */ +.highlight .na { color: #7D9029 } /* Name.Attribute */ +.highlight .nb { color: #008000 } /* Name.Builtin */ +.highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */ +.highlight .no { color: #880000 } /* Name.Constant */ +.highlight .nd { color: #AA22FF } /* Name.Decorator */ +.highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */ +.highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #0000FF } /* Name.Function */ +.highlight .nl { color: #A0A000 } /* Name.Label */ +.highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ +.highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #19177C } /* Name.Variable */ +.highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mb { color: #666666 } /* Literal.Number.Bin */ +.highlight .mf { color: #666666 } /* Literal.Number.Float */ +.highlight .mh { color: #666666 } /* Literal.Number.Hex */ +.highlight .mi { color: #666666 } /* Literal.Number.Integer */ +.highlight .mo { color: #666666 } /* Literal.Number.Oct */ +.highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +.highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +.highlight .sc { color: #BA2121 } /* Literal.String.Char */ +.highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +.highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +.highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ +.highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +.highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ +.highlight .sx { color: #008000 } /* Literal.String.Other */ +.highlight .sr { color: #BB6688 } /* Literal.String.Regex */ +.highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +.highlight .ss { color: #19177C } /* Literal.String.Symbol */ +.highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +.highlight .fm { color: #0000FF } /* Name.Function.Magic */ +.highlight .vc { color: #19177C } /* Name.Variable.Class */ +.highlight .vg { color: #19177C } /* Name.Variable.Global */ +.highlight .vi { color: #19177C } /* Name.Variable.Instance */ +.highlight .vm { color: #19177C } /* Name.Variable.Magic */ +.highlight .il { color: #666666 } /* Literal.Number.Integer.Long */</style><div class="highlight"><pre><span></span><span class="nd">@patch</span> +<span class="nd">@get_efficiently</span> +<span class="k">def</span> <span class="nf">get_mov_data</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span><span class="n">DataProvider</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> This function gets Movisense data</span> +<span class="sd"> 1) We create unique participnat IDs (e.g. "b001"; this is necessary as sites use overapping IDs)</span> +<span class="sd"> 2) We merge double IDs, so participants with two IDs only have one (for this duplicate_ids.csv has to be updated)</span> +<span class="sd"> 3) We remove pilot participants</span> +<span class="sd"> 4) We get starting dates (from the participant overviews in movisense; downloaded as html)</span> +<span class="sd"> 5) We calculate sampling days and end dates based on the starting dates</span> +<span class="sd"> """</span> + <span class="c1"># Loading raw data</span> + <span class="n">mov_berlin</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_berlin_path</span><span class="p">,</span> <span class="n">sep</span> <span class="o">=</span> <span class="s1">';'</span><span class="p">)</span> + <span class="n">mov_dresden</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_dresden_path</span><span class="p">,</span> <span class="n">sep</span> <span class="o">=</span> <span class="s1">';'</span><span class="p">)</span> + <span class="n">mov_mannheim</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_mannheim_path</span><span class="p">,</span> <span class="n">sep</span> <span class="o">=</span> <span class="s1">';'</span><span class="p">)</span> + + <span class="c1"># Merging (participant numbers repeat so we add the first letter of location)</span> + <span class="n">mov_berlin</span><span class="p">[</span><span class="s1">'location'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'berlin'</span> + <span class="n">mov_dresden</span><span class="p">[</span><span class="s1">'location'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'dresden'</span> + <span class="n">mov_mannheim</span><span class="p">[</span><span class="s1">'location'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'mannheim'</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">concat</span><span class="p">([</span><span class="n">mov_berlin</span><span class="p">,</span><span class="n">mov_dresden</span><span class="p">,</span><span class="n">mov_mannheim</span><span class="p">])</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'participant'</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="s1">'location'</span><span class="p">]</span><span class="o">.</span><span class="n">str</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">+</span> <span class="n">df</span><span class="o">.</span><span class="n">Participant</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="s1">'</span><span class="si">%03d</span><span class="s1">'</span><span class="o">%</span><span class="nb">int</span><span class="p">(</span><span class="n">x</span><span class="p">))</span> + <span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">columns</span> <span class="o">=</span> <span class="s1">'Participant'</span><span class="p">,</span> <span class="n">inplace</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="c1"># Dropping old participant column to avoid mistakes</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'trigger_date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">Trigger_date</span> <span class="o">+</span> <span class="s1">' '</span> <span class="o">+</span> <span class="n">df</span><span class="o">.</span><span class="n">Trigger_time</span><span class="p">)</span> + + <span class="c1"># Merging double IDs (for participants with several movisense IDs)</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'participant'</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">get_duplicate_mov_ids</span><span class="p">())</span> + + <span class="c1"># Removing pilot participants</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="o">~</span><span class="n">df</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">str</span><span class="p">)</span><span class="o">.</span><span class="n">str</span><span class="o">.</span><span class="n">contains</span><span class="p">(</span><span class="s1">'test'</span><span class="p">)]</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="o">~</span><span class="n">df</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">isin</span><span class="p">([</span><span class="s1">'m157'</span><span class="p">,</span> <span class="s1">'b010'</span><span class="p">,</span> <span class="s1">'b006'</span><span class="p">,</span> <span class="s1">'d001'</span><span class="p">,</span> <span class="s1">'d002'</span><span class="p">,</span> <span class="s1">'d042'</span><span class="p">,</span> <span class="s1">'m024'</span><span class="p">,</span> <span class="s1">'m028'</span><span class="p">,</span> <span class="s1">'m071'</span><span class="p">,</span> <span class="s1">'m079'</span><span class="p">,</span> <span class="s1">'m107'</span><span class="p">])]</span> + + + <span class="c1"># Adding starting dates to get sampling days</span> + <span class="k">def</span> <span class="nf">get_starting_dates</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">pp_prefix</span> <span class="o">=</span> <span class="s1">''</span><span class="p">):</span> + <span class="n">soup</span> <span class="o">=</span> <span class="n">bs</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">())</span> + <span class="n">ids</span> <span class="o">=</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">soup</span><span class="o">.</span><span class="n">find_all</span><span class="p">(</span><span class="s2">"td"</span><span class="p">,</span> <span class="n">class_</span> <span class="o">=</span> <span class="s1">'simpleId'</span><span class="p">)]</span> + <span class="n">c_dates</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">find_all</span><span class="p">(</span><span class="s2">"span"</span><span class="p">)[</span><span class="mi">0</span><span class="p">][</span><span class="s1">'title'</span><span class="p">]</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">soup</span><span class="o">.</span><span class="n">find_all</span><span class="p">(</span><span class="s2">"td"</span><span class="p">,</span> <span class="n">class_</span> <span class="o">=</span> <span class="s1">'coupleDate'</span><span class="p">)]</span> + <span class="n">s_dates</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="p">[</span><span class="s1">'value'</span><span class="p">]</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">soup</span><span class="o">.</span><span class="n">find_all</span><span class="p">(</span><span class="s2">"input"</span><span class="p">,</span> <span class="n">class_</span> <span class="o">=</span> <span class="s1">'dp startDate'</span><span class="p">)]</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">'participant'</span><span class="p">:</span><span class="n">ids</span><span class="p">,</span><span class="s1">'coupling_date'</span><span class="p">:</span><span class="n">c_dates</span><span class="p">,</span><span class="s1">'starting_date'</span><span class="p">:</span><span class="n">s_dates</span><span class="p">})</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'coupling_date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">coupling_date</span><span class="p">)</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'starting_date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">starting_date</span><span class="p">)</span> + <span class="n">df</span><span class="o">.</span><span class="n">starting_date</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">coupling_date</span><span class="p">,</span><span class="n">inplace</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'participant'</span><span class="p">]</span> <span class="o">=</span> <span class="n">pp_prefix</span> <span class="o">+</span> <span class="n">df</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="s1">'</span><span class="si">%03d</span><span class="s1">'</span><span class="o">%</span><span class="nb">int</span><span class="p">(</span><span class="n">x</span><span class="p">))</span> + <span class="k">return</span> <span class="n">df</span> + + <span class="n">starting_dates</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">concat</span><span class="p">([</span> + <span class="n">get_starting_dates</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_berlin_starting_dates_path</span><span class="p">,</span> <span class="s1">'b'</span><span class="p">),</span> + <span class="n">get_starting_dates</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_dresden_starting_dates_path</span><span class="p">,</span> <span class="s1">'d'</span><span class="p">),</span> + <span class="n">get_starting_dates</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_mannheim_starting_dates_path</span><span class="p">,</span> <span class="s1">'m'</span><span class="p">)</span> + <span class="p">])</span> + <span class="c1"># For participants with several movisense IDs we use the first coupling date</span> + <span class="n">starting_dates</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">get_duplicate_mov_ids</span><span class="p">(),</span> <span class="n">inplace</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> + <span class="n">starting_dates</span> <span class="o">=</span> <span class="n">starting_dates</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">'participant'</span><span class="p">)[[</span><span class="s1">'starting_date'</span><span class="p">,</span><span class="s1">'coupling_date'</span><span class="p">]]</span><span class="o">.</span><span class="n">min</span><span class="p">()</span><span class="o">.</span><span class="n">reset_index</span><span class="p">()</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">starting_dates</span><span class="p">,</span> <span class="n">on</span><span class="o">=</span><span class="s2">"participant"</span><span class="p">,</span> <span class="n">how</span> <span class="o">=</span> <span class="s1">'left'</span><span class="p">,</span> <span class="n">indicator</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> + <span class="c1"># Checking if starting dates were downloaded</span> + <span class="k">if</span> <span class="s2">"left_only"</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">_merge</span><span class="o">.</span><span class="n">unique</span><span class="p">():</span> + <span class="n">no_starting_dates</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s1">'_merge == "left_only"'</span><span class="p">)</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"Starting dates missing for participants below. Did you download the participant overviews as html?"</span><span class="p">,</span> <span class="n">no_starting_dates</span><span class="p">)</span> + <span class="c1"># Calculating movisense sampling day, adding date and end_date</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'sampling_day'</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="s1">'trigger_date'</span><span class="p">]</span> <span class="o">-</span> <span class="n">df</span><span class="p">[</span><span class="s1">'starting_date'</span><span class="p">])</span><span class="o">.</span><span class="n">dt</span><span class="o">.</span><span class="n">days</span> <span class="o">+</span> <span class="mi">1</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">trigger_date</span><span class="o">.</span><span class="n">dt</span><span class="o">.</span><span class="n">date</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'end_date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">date</span> <span class="o">+</span> <span class="n">pd</span><span class="o">.</span><span class="n">DateOffset</span><span class="p">(</span><span class="n">days</span> <span class="o">=</span> <span class="mi">365</span><span class="p">)</span> + <span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="s1">'mov_index'</span><span class="p">,</span><span class="n">inplace</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> + <span class="c1"># Adding redcap IDs</span> + <span class="n">ids_table</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_ba_data</span><span class="p">()[[</span><span class="s1">'participant_id'</span><span class="p">,</span><span class="s1">'mov_id'</span><span class="p">]]</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s1">'mov_id==mov_id'</span><span class="p">)</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">'mov_id'</span><span class="p">)</span><span class="o">.</span><span class="n">first</span><span class="p">()</span> + <span class="n">ids_table</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'redcap_id'</span><span class="p">]</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">ids_table</span><span class="p">,</span> <span class="n">left_on</span><span class="o">=</span><span class="s1">'participant'</span><span class="p">,</span> <span class="n">right_index</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">how</span> <span class="o">=</span> <span class="s1">'left'</span><span class="p">)</span> + <span class="c1"># Filtering out participants with no associated redcap data</span> + <span class="n">no_redcap</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s2">"redcap_id.isna()"</span><span class="p">)</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"Participants: </span><span class="si">%s</span><span class="s2"> have no associated redcap IDs and are excluded from the following analyses."</span><span class="o">%</span><span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">no_redcap</span><span class="p">))</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">redcap_id</span><span class="o">.</span><span class="n">isna</span><span class="p">()</span><span class="o">==</span><span class="kc">False</span><span class="p">]</span> + <span class="k">return</span> <span class="n">df</span> +</pre></div> + + + + +```python +#%load_ext autoreload +#%autoreload 2 +from trr265.data_provider import DataProvider +dp = DataProvider('/Users/hilmarzech/Projects/trr265/trr265/data/') # Path to data folder (containing raw, interim, external, and processed) +dp.get_two_day_data().iloc[:20][['participant','date','MDBF_zufrieden','g_alc']] +``` + + + + +<div> +<style scoped> + .dataframe tbody tr th:only-of-type { + vertical-align: middle; + } + + .dataframe tbody tr th { + vertical-align: top; + } + + .dataframe thead th { + text-align: right; + } +</style> +<table border="1" class="dataframe"> + <thead> + <tr style="text-align: right;"> + <th></th> + <th>participant</th> + <th>date</th> + <th>MDBF_zufrieden</th> + <th>g_alc</th> + </tr> + <tr> + <th>two_day_index</th> + <th></th> + <th></th> + <th></th> + <th></th> + </tr> + </thead> + <tbody> + <tr> + <th>0</th> + <td>b001</td> + <td>2020-02-22</td> + <td>NaN</td> + <td>6.4</td> + </tr> + <tr> + <th>1</th> + <td>b001</td> + <td>2020-02-23</td> + <td>NaN</td> + <td>35.2</td> + </tr> + <tr> + <th>2</th> + <td>b001</td> + <td>2020-02-24</td> + <td>2.0</td> + <td>NaN</td> + </tr> + <tr> + <th>3</th> + <td>b001</td> + <td>2020-02-25</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>4</th> + <td>b001</td> + <td>2020-02-26</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>5</th> + <td>b001</td> + <td>2020-02-27</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>6</th> + <td>b001</td> + <td>2020-02-28</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>7</th> + <td>b001</td> + <td>2020-02-29</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>8</th> + <td>b001</td> + <td>2020-03-01</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>9</th> + <td>b001</td> + <td>2020-03-02</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>10</th> + <td>b001</td> + <td>2020-03-03</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>11</th> + <td>b001</td> + <td>2020-03-04</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>12</th> + <td>b001</td> + <td>2020-03-05</td> + <td>NaN</td> + <td>0.0</td> + </tr> + <tr> + <th>13</th> + <td>b001</td> + <td>2020-03-06</td> + <td>NaN</td> + <td>57.6</td> + </tr> + <tr> + <th>14</th> + <td>b001</td> + <td>2020-03-07</td> + <td>3.0</td> + <td>NaN</td> + </tr> + <tr> + <th>15</th> + <td>b001</td> + <td>2020-03-08</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>16</th> + <td>b001</td> + <td>2020-03-09</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>17</th> + <td>b001</td> + <td>2020-03-10</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>18</th> + <td>b001</td> + <td>2020-03-11</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>19</th> + <td>b001</td> + <td>2020-03-12</td> + <td>NaN</td> + <td>NaN</td> + </tr> + </tbody> +</table> +</div> + + + +## Required data + +### Phone screening +- data/external/b7_participants.xlsx <- from Hilmar +- data/raw/phonescreening.csv <- from redcap +- data/external/phone_codebook.html <- from redcap + +### Basic assessment (from redcap) +- data/raw/ba.csv <- from redcap +- data/external/ba_codebook.html <- from redcap + +### Movisens +- all basic assessment data (see above) +- data/raw/mov_data_b.csv +- data/raw/mov_data_d.csv +- data/raw/mov_data_m.csv +- data/raw/starting_dates_b.csv +- data/raw/starting_dates_d.csv +- data/raw/starting_dates_m.csv +- data/external/alcohol_per_drink.csv <- from Hilmar + + + + +%package help +Summary: Development documents and examples for trr265 +Provides: python3-trr265-doc +%description help +# TRR 265 +> This module handles analysis of the TRR265 data. + + +## Install + +`pip install trr265` + +`pip install biuR` (optional but needed for most analyses) + +## How to use + +```python +from pygments.formatters import HtmlFormatter +from pygments import highlight +import IPython +import inspect +from pygments.lexers import PythonLexer + + +def display_function(the_function): + formatter = HtmlFormatter() + return IPython.display.HTML('<style type="text/css">{}</style>{}'.format( + formatter.get_style_defs('.highlight'), + highlight(inspect.getsource(the_function), PythonLexer(), formatter))) +``` + +```python +display_function(dp.get_mov_data) +``` + + + + +<style type="text/css">pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight .hll { background-color: #ffffcc } +.highlight { background: #f8f8f8; } +.highlight .c { color: #408080; font-style: italic } /* Comment */ +.highlight .err { border: 1px solid #FF0000 } /* Error */ +.highlight .k { color: #008000; font-weight: bold } /* Keyword */ +.highlight .o { color: #666666 } /* Operator */ +.highlight .ch { color: #408080; font-style: italic } /* Comment.Hashbang */ +.highlight .cm { color: #408080; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #BC7A00 } /* Comment.Preproc */ +.highlight .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */ +.highlight .c1 { color: #408080; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #408080; font-style: italic } /* Comment.Special */ +.highlight .gd { color: #A00000 } /* Generic.Deleted */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .gr { color: #FF0000 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #888888 } /* Generic.Output */ +.highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #0044DD } /* Generic.Traceback */ +.highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #008000 } /* Keyword.Pseudo */ +.highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #B00040 } /* Keyword.Type */ +.highlight .m { color: #666666 } /* Literal.Number */ +.highlight .s { color: #BA2121 } /* Literal.String */ +.highlight .na { color: #7D9029 } /* Name.Attribute */ +.highlight .nb { color: #008000 } /* Name.Builtin */ +.highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */ +.highlight .no { color: #880000 } /* Name.Constant */ +.highlight .nd { color: #AA22FF } /* Name.Decorator */ +.highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */ +.highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #0000FF } /* Name.Function */ +.highlight .nl { color: #A0A000 } /* Name.Label */ +.highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ +.highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #19177C } /* Name.Variable */ +.highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mb { color: #666666 } /* Literal.Number.Bin */ +.highlight .mf { color: #666666 } /* Literal.Number.Float */ +.highlight .mh { color: #666666 } /* Literal.Number.Hex */ +.highlight .mi { color: #666666 } /* Literal.Number.Integer */ +.highlight .mo { color: #666666 } /* Literal.Number.Oct */ +.highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +.highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +.highlight .sc { color: #BA2121 } /* Literal.String.Char */ +.highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +.highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +.highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ +.highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +.highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ +.highlight .sx { color: #008000 } /* Literal.String.Other */ +.highlight .sr { color: #BB6688 } /* Literal.String.Regex */ +.highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +.highlight .ss { color: #19177C } /* Literal.String.Symbol */ +.highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +.highlight .fm { color: #0000FF } /* Name.Function.Magic */ +.highlight .vc { color: #19177C } /* Name.Variable.Class */ +.highlight .vg { color: #19177C } /* Name.Variable.Global */ +.highlight .vi { color: #19177C } /* Name.Variable.Instance */ +.highlight .vm { color: #19177C } /* Name.Variable.Magic */ +.highlight .il { color: #666666 } /* Literal.Number.Integer.Long */</style><div class="highlight"><pre><span></span><span class="nd">@patch</span> +<span class="nd">@get_efficiently</span> +<span class="k">def</span> <span class="nf">get_mov_data</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span><span class="n">DataProvider</span><span class="p">):</span> + <span class="sd">"""</span> +<span class="sd"> This function gets Movisense data</span> +<span class="sd"> 1) We create unique participnat IDs (e.g. "b001"; this is necessary as sites use overapping IDs)</span> +<span class="sd"> 2) We merge double IDs, so participants with two IDs only have one (for this duplicate_ids.csv has to be updated)</span> +<span class="sd"> 3) We remove pilot participants</span> +<span class="sd"> 4) We get starting dates (from the participant overviews in movisense; downloaded as html)</span> +<span class="sd"> 5) We calculate sampling days and end dates based on the starting dates</span> +<span class="sd"> """</span> + <span class="c1"># Loading raw data</span> + <span class="n">mov_berlin</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_berlin_path</span><span class="p">,</span> <span class="n">sep</span> <span class="o">=</span> <span class="s1">';'</span><span class="p">)</span> + <span class="n">mov_dresden</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_dresden_path</span><span class="p">,</span> <span class="n">sep</span> <span class="o">=</span> <span class="s1">';'</span><span class="p">)</span> + <span class="n">mov_mannheim</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_mannheim_path</span><span class="p">,</span> <span class="n">sep</span> <span class="o">=</span> <span class="s1">';'</span><span class="p">)</span> + + <span class="c1"># Merging (participant numbers repeat so we add the first letter of location)</span> + <span class="n">mov_berlin</span><span class="p">[</span><span class="s1">'location'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'berlin'</span> + <span class="n">mov_dresden</span><span class="p">[</span><span class="s1">'location'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'dresden'</span> + <span class="n">mov_mannheim</span><span class="p">[</span><span class="s1">'location'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'mannheim'</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">concat</span><span class="p">([</span><span class="n">mov_berlin</span><span class="p">,</span><span class="n">mov_dresden</span><span class="p">,</span><span class="n">mov_mannheim</span><span class="p">])</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'participant'</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="s1">'location'</span><span class="p">]</span><span class="o">.</span><span class="n">str</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">+</span> <span class="n">df</span><span class="o">.</span><span class="n">Participant</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="s1">'</span><span class="si">%03d</span><span class="s1">'</span><span class="o">%</span><span class="nb">int</span><span class="p">(</span><span class="n">x</span><span class="p">))</span> + <span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">columns</span> <span class="o">=</span> <span class="s1">'Participant'</span><span class="p">,</span> <span class="n">inplace</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="c1"># Dropping old participant column to avoid mistakes</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'trigger_date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">Trigger_date</span> <span class="o">+</span> <span class="s1">' '</span> <span class="o">+</span> <span class="n">df</span><span class="o">.</span><span class="n">Trigger_time</span><span class="p">)</span> + + <span class="c1"># Merging double IDs (for participants with several movisense IDs)</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'participant'</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">get_duplicate_mov_ids</span><span class="p">())</span> + + <span class="c1"># Removing pilot participants</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="o">~</span><span class="n">df</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">str</span><span class="p">)</span><span class="o">.</span><span class="n">str</span><span class="o">.</span><span class="n">contains</span><span class="p">(</span><span class="s1">'test'</span><span class="p">)]</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="o">~</span><span class="n">df</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">isin</span><span class="p">([</span><span class="s1">'m157'</span><span class="p">,</span> <span class="s1">'b010'</span><span class="p">,</span> <span class="s1">'b006'</span><span class="p">,</span> <span class="s1">'d001'</span><span class="p">,</span> <span class="s1">'d002'</span><span class="p">,</span> <span class="s1">'d042'</span><span class="p">,</span> <span class="s1">'m024'</span><span class="p">,</span> <span class="s1">'m028'</span><span class="p">,</span> <span class="s1">'m071'</span><span class="p">,</span> <span class="s1">'m079'</span><span class="p">,</span> <span class="s1">'m107'</span><span class="p">])]</span> + + + <span class="c1"># Adding starting dates to get sampling days</span> + <span class="k">def</span> <span class="nf">get_starting_dates</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">pp_prefix</span> <span class="o">=</span> <span class="s1">''</span><span class="p">):</span> + <span class="n">soup</span> <span class="o">=</span> <span class="n">bs</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">())</span> + <span class="n">ids</span> <span class="o">=</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">text</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">soup</span><span class="o">.</span><span class="n">find_all</span><span class="p">(</span><span class="s2">"td"</span><span class="p">,</span> <span class="n">class_</span> <span class="o">=</span> <span class="s1">'simpleId'</span><span class="p">)]</span> + <span class="n">c_dates</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="o">.</span><span class="n">find_all</span><span class="p">(</span><span class="s2">"span"</span><span class="p">)[</span><span class="mi">0</span><span class="p">][</span><span class="s1">'title'</span><span class="p">]</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">soup</span><span class="o">.</span><span class="n">find_all</span><span class="p">(</span><span class="s2">"td"</span><span class="p">,</span> <span class="n">class_</span> <span class="o">=</span> <span class="s1">'coupleDate'</span><span class="p">)]</span> + <span class="n">s_dates</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span><span class="p">[</span><span class="s1">'value'</span><span class="p">]</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">soup</span><span class="o">.</span><span class="n">find_all</span><span class="p">(</span><span class="s2">"input"</span><span class="p">,</span> <span class="n">class_</span> <span class="o">=</span> <span class="s1">'dp startDate'</span><span class="p">)]</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">'participant'</span><span class="p">:</span><span class="n">ids</span><span class="p">,</span><span class="s1">'coupling_date'</span><span class="p">:</span><span class="n">c_dates</span><span class="p">,</span><span class="s1">'starting_date'</span><span class="p">:</span><span class="n">s_dates</span><span class="p">})</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'coupling_date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">coupling_date</span><span class="p">)</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'starting_date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">starting_date</span><span class="p">)</span> + <span class="n">df</span><span class="o">.</span><span class="n">starting_date</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">coupling_date</span><span class="p">,</span><span class="n">inplace</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'participant'</span><span class="p">]</span> <span class="o">=</span> <span class="n">pp_prefix</span> <span class="o">+</span> <span class="n">df</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="s1">'</span><span class="si">%03d</span><span class="s1">'</span><span class="o">%</span><span class="nb">int</span><span class="p">(</span><span class="n">x</span><span class="p">))</span> + <span class="k">return</span> <span class="n">df</span> + + <span class="n">starting_dates</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">concat</span><span class="p">([</span> + <span class="n">get_starting_dates</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_berlin_starting_dates_path</span><span class="p">,</span> <span class="s1">'b'</span><span class="p">),</span> + <span class="n">get_starting_dates</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_dresden_starting_dates_path</span><span class="p">,</span> <span class="s1">'d'</span><span class="p">),</span> + <span class="n">get_starting_dates</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mov_mannheim_starting_dates_path</span><span class="p">,</span> <span class="s1">'m'</span><span class="p">)</span> + <span class="p">])</span> + <span class="c1"># For participants with several movisense IDs we use the first coupling date</span> + <span class="n">starting_dates</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">get_duplicate_mov_ids</span><span class="p">(),</span> <span class="n">inplace</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> + <span class="n">starting_dates</span> <span class="o">=</span> <span class="n">starting_dates</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">'participant'</span><span class="p">)[[</span><span class="s1">'starting_date'</span><span class="p">,</span><span class="s1">'coupling_date'</span><span class="p">]]</span><span class="o">.</span><span class="n">min</span><span class="p">()</span><span class="o">.</span><span class="n">reset_index</span><span class="p">()</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">starting_dates</span><span class="p">,</span> <span class="n">on</span><span class="o">=</span><span class="s2">"participant"</span><span class="p">,</span> <span class="n">how</span> <span class="o">=</span> <span class="s1">'left'</span><span class="p">,</span> <span class="n">indicator</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> + <span class="c1"># Checking if starting dates were downloaded</span> + <span class="k">if</span> <span class="s2">"left_only"</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">_merge</span><span class="o">.</span><span class="n">unique</span><span class="p">():</span> + <span class="n">no_starting_dates</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s1">'_merge == "left_only"'</span><span class="p">)</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"Starting dates missing for participants below. Did you download the participant overviews as html?"</span><span class="p">,</span> <span class="n">no_starting_dates</span><span class="p">)</span> + <span class="c1"># Calculating movisense sampling day, adding date and end_date</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'sampling_day'</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="s1">'trigger_date'</span><span class="p">]</span> <span class="o">-</span> <span class="n">df</span><span class="p">[</span><span class="s1">'starting_date'</span><span class="p">])</span><span class="o">.</span><span class="n">dt</span><span class="o">.</span><span class="n">days</span> <span class="o">+</span> <span class="mi">1</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">trigger_date</span><span class="o">.</span><span class="n">dt</span><span class="o">.</span><span class="n">date</span> + <span class="n">df</span><span class="p">[</span><span class="s1">'end_date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">date</span> <span class="o">+</span> <span class="n">pd</span><span class="o">.</span><span class="n">DateOffset</span><span class="p">(</span><span class="n">days</span> <span class="o">=</span> <span class="mi">365</span><span class="p">)</span> + <span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="s1">'mov_index'</span><span class="p">,</span><span class="n">inplace</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> + <span class="c1"># Adding redcap IDs</span> + <span class="n">ids_table</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_ba_data</span><span class="p">()[[</span><span class="s1">'participant_id'</span><span class="p">,</span><span class="s1">'mov_id'</span><span class="p">]]</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s1">'mov_id==mov_id'</span><span class="p">)</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">'mov_id'</span><span class="p">)</span><span class="o">.</span><span class="n">first</span><span class="p">()</span> + <span class="n">ids_table</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'redcap_id'</span><span class="p">]</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">ids_table</span><span class="p">,</span> <span class="n">left_on</span><span class="o">=</span><span class="s1">'participant'</span><span class="p">,</span> <span class="n">right_index</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">how</span> <span class="o">=</span> <span class="s1">'left'</span><span class="p">)</span> + <span class="c1"># Filtering out participants with no associated redcap data</span> + <span class="n">no_redcap</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s2">"redcap_id.isna()"</span><span class="p">)</span><span class="o">.</span><span class="n">participant</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span> + <span class="nb">print</span><span class="p">(</span><span class="s2">"Participants: </span><span class="si">%s</span><span class="s2"> have no associated redcap IDs and are excluded from the following analyses."</span><span class="o">%</span><span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">no_redcap</span><span class="p">))</span> + <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">redcap_id</span><span class="o">.</span><span class="n">isna</span><span class="p">()</span><span class="o">==</span><span class="kc">False</span><span class="p">]</span> + <span class="k">return</span> <span class="n">df</span> +</pre></div> + + + + +```python +#%load_ext autoreload +#%autoreload 2 +from trr265.data_provider import DataProvider +dp = DataProvider('/Users/hilmarzech/Projects/trr265/trr265/data/') # Path to data folder (containing raw, interim, external, and processed) +dp.get_two_day_data().iloc[:20][['participant','date','MDBF_zufrieden','g_alc']] +``` + + + + +<div> +<style scoped> + .dataframe tbody tr th:only-of-type { + vertical-align: middle; + } + + .dataframe tbody tr th { + vertical-align: top; + } + + .dataframe thead th { + text-align: right; + } +</style> +<table border="1" class="dataframe"> + <thead> + <tr style="text-align: right;"> + <th></th> + <th>participant</th> + <th>date</th> + <th>MDBF_zufrieden</th> + <th>g_alc</th> + </tr> + <tr> + <th>two_day_index</th> + <th></th> + <th></th> + <th></th> + <th></th> + </tr> + </thead> + <tbody> + <tr> + <th>0</th> + <td>b001</td> + <td>2020-02-22</td> + <td>NaN</td> + <td>6.4</td> + </tr> + <tr> + <th>1</th> + <td>b001</td> + <td>2020-02-23</td> + <td>NaN</td> + <td>35.2</td> + </tr> + <tr> + <th>2</th> + <td>b001</td> + <td>2020-02-24</td> + <td>2.0</td> + <td>NaN</td> + </tr> + <tr> + <th>3</th> + <td>b001</td> + <td>2020-02-25</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>4</th> + <td>b001</td> + <td>2020-02-26</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>5</th> + <td>b001</td> + <td>2020-02-27</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>6</th> + <td>b001</td> + <td>2020-02-28</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>7</th> + <td>b001</td> + <td>2020-02-29</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>8</th> + <td>b001</td> + <td>2020-03-01</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>9</th> + <td>b001</td> + <td>2020-03-02</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>10</th> + <td>b001</td> + <td>2020-03-03</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>11</th> + <td>b001</td> + <td>2020-03-04</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>12</th> + <td>b001</td> + <td>2020-03-05</td> + <td>NaN</td> + <td>0.0</td> + </tr> + <tr> + <th>13</th> + <td>b001</td> + <td>2020-03-06</td> + <td>NaN</td> + <td>57.6</td> + </tr> + <tr> + <th>14</th> + <td>b001</td> + <td>2020-03-07</td> + <td>3.0</td> + <td>NaN</td> + </tr> + <tr> + <th>15</th> + <td>b001</td> + <td>2020-03-08</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>16</th> + <td>b001</td> + <td>2020-03-09</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>17</th> + <td>b001</td> + <td>2020-03-10</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>18</th> + <td>b001</td> + <td>2020-03-11</td> + <td>NaN</td> + <td>NaN</td> + </tr> + <tr> + <th>19</th> + <td>b001</td> + <td>2020-03-12</td> + <td>NaN</td> + <td>NaN</td> + </tr> + </tbody> +</table> +</div> + + + +## Required data + +### Phone screening +- data/external/b7_participants.xlsx <- from Hilmar +- data/raw/phonescreening.csv <- from redcap +- data/external/phone_codebook.html <- from redcap + +### Basic assessment (from redcap) +- data/raw/ba.csv <- from redcap +- data/external/ba_codebook.html <- from redcap + +### Movisens +- all basic assessment data (see above) +- data/raw/mov_data_b.csv +- data/raw/mov_data_d.csv +- data/raw/mov_data_m.csv +- data/raw/starting_dates_b.csv +- data/raw/starting_dates_d.csv +- data/raw/starting_dates_m.csv +- data/external/alcohol_per_drink.csv <- from Hilmar + + + + +%prep +%autosetup -n trr265-0.0.10 + +%build +%py3_build + +%install +%py3_install +install -d -m755 %{buildroot}/%{_pkgdocdir} +if [ -d doc ]; then cp -arf doc %{buildroot}/%{_pkgdocdir}; fi +if [ -d docs ]; then cp -arf docs %{buildroot}/%{_pkgdocdir}; fi +if [ -d example ]; then cp -arf example %{buildroot}/%{_pkgdocdir}; fi +if [ -d examples ]; then cp -arf examples %{buildroot}/%{_pkgdocdir}; fi +pushd %{buildroot} +if [ -d usr/lib ]; then + find usr/lib -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/lib64 ]; then + find usr/lib64 -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/bin ]; then + find usr/bin -type f -printf "/%h/%f\n" >> filelist.lst +fi +if [ -d usr/sbin ]; then + find usr/sbin -type f -printf "/%h/%f\n" >> filelist.lst +fi +touch doclist.lst +if [ -d usr/share/man ]; then + find usr/share/man -type f -printf "/%h/%f.gz\n" >> doclist.lst +fi +popd +mv %{buildroot}/filelist.lst . +mv %{buildroot}/doclist.lst . + +%files -n python3-trr265 -f filelist.lst +%dir %{python3_sitelib}/* + +%files help -f doclist.lst +%{_docdir}/* + +%changelog +* Wed May 31 2023 Python_Bot <Python_Bot@openeuler.org> - 0.0.10-1 +- Package Spec generated @@ -0,0 +1 @@ +5ef95bc8687cd0bda051a1def06c56af trr265-0.0.10.tar.gz |