diff --git a/NEWS.md b/NEWS.md
index b8b2cc2..82e4fd7 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -19,6 +19,8 @@ release:
 
 * upgrade website to bootstrap 5
 
+* edit typo in vignette
+
 
 # nipnTK 0.1.0
 
diff --git a/docs/articles/ad.html b/docs/articles/ad.html
index a2acae0..81c36aa 100644
--- a/docs/articles/ad.html
+++ b/docs/articles/ad.html
@@ -628,7 +628,7 @@ <h2 id="skew-and-kurtosis">Skew and kurtosis<a class="anchor" aria-label="anchor
 </tr>
 <tr>
 <td style="text-align:center;">
-≥ 0.6 and &lt; 0.6
+≥ 0.4 and &lt; 0.6
 </td>
 <td style="text-align:center;">
 Acceptable
diff --git a/docs/news/index.html b/docs/news/index.html
index 7edaaee..8dfbe8d 100644
--- a/docs/news/index.html
+++ b/docs/news/index.html
@@ -84,6 +84,7 @@ <h3 id="general-updates-0-1-1-9000">General updates<a class="anchor" aria-label=
 <li><p>add CITATION</p></li>
 <li><p>update CONTRIBUTOR guidelines</p></li>
 <li><p>upgrade website to bootstrap 5</p></li>
+<li><p>edit typo in vignette</p></li>
 </ul></div>
 </div>
     <div class="section level2">
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index c011465..66f5785 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -11,7 +11,7 @@ articles:
   rl: rl.html
   sp: sp.html
   sr: sr.html
-last_built: 2023-01-05T00:31Z
+last_built: 2023-01-05T00:55Z
 urls:
   reference: https://nutriverse.io/nipnTK/reference
   article: https://nutriverse.io/nipnTK/articles
diff --git a/docs/search.json b/docs/search.json
index 0f4acd6..7d4f781 100644
--- a/docs/search.json
+++ b/docs/search.json
@@ -1 +1 @@
-[{"path":[]},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement ernest@guevarra.io. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.1, available https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. Community Impact Guidelines inspired [Mozilla’s code conduct enforcement ladder][https://github.com/mozilla/inclusion]. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https://www.contributor-covenant.org/translations.","code":""},{"path":[]},{"path":"https://nutriverse.io/nipnTK/CONTRIBUTING.html","id":"bugs","dir":"","previous_headings":"","what":"Bugs","title":"Contributing","text":"Submit issue issues page","code":""},{"path":"https://nutriverse.io/nipnTK/CONTRIBUTING.html","id":"code-contributions","dir":"","previous_headings":"","what":"Code contributions","title":"Contributing","text":"Fork repository Github account Clone version account machine account Make sure track progress upstream .e., version nipnTK nutriverse/nipnTK, making changes make sure pull changes upstream either git fetch upstream merge later git pull upstream fetch merge one step Make changes new feature branch Please write test tests changes affect code just documentation Push changes account Submit pull request nutriverse/nipnTK","code":"git clone https://github.com/<yourgithubusername>/nipnTK.git git remote add upstream https://github.com/nutriverse/nipnTK.git"},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"GNU General Public License","title":"GNU General Public License","text":"Version 3, 29 June 2007Copyright © 2007 Free Software Foundation, Inc. <http://fsf.org/> Everyone permitted copy distribute verbatim copies license document, changing allowed.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"preamble","dir":"","previous_headings":"","what":"Preamble","title":"GNU General Public License","text":"GNU General Public License free, copyleft license software kinds works. licenses software practical works designed take away freedom share change works. contrast, GNU General Public License intended guarantee freedom share change versions program–make sure remains free software users. , Free Software Foundation, use GNU General Public License software; applies also work released way authors. can apply programs, . speak free software, referring freedom, price. General Public Licenses designed make sure freedom distribute copies free software (charge wish), receive source code can get want , can change software use pieces new free programs, know can things. protect rights, need prevent others denying rights asking surrender rights. Therefore, certain responsibilities distribute copies software, modify : responsibilities respect freedom others. example, distribute copies program, whether gratis fee, must pass recipients freedoms received. must make sure , , receive can get source code. must show terms know rights. Developers use GNU GPL protect rights two steps: (1) assert copyright software, (2) offer License giving legal permission copy, distribute /modify . developers’ authors’ protection, GPL clearly explains warranty free software. users’ authors’ sake, GPL requires modified versions marked changed, problems attributed erroneously authors previous versions. devices designed deny users access install run modified versions software inside , although manufacturer can . fundamentally incompatible aim protecting users’ freedom change software. systematic pattern abuse occurs area products individuals use, precisely unacceptable. Therefore, designed version GPL prohibit practice products. problems arise substantially domains, stand ready extend provision domains future versions GPL, needed protect freedom users. Finally, every program threatened constantly software patents. States allow patents restrict development use software general-purpose computers, , wish avoid special danger patents applied free program make effectively proprietary. prevent , GPL assures patents used render program non-free. precise terms conditions copying, distribution modification follow.","code":""},{"path":[]},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_0-definitions","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"0. Definitions","title":"GNU General Public License","text":"“License” refers version 3 GNU General Public License. “Copyright” also means copyright-like laws apply kinds works, semiconductor masks. “Program” refers copyrightable work licensed License. licensee addressed “”. “Licensees” “recipients” may individuals organizations. “modify” work means copy adapt part work fashion requiring copyright permission, making exact copy. resulting work called “modified version” earlier work work “based ” earlier work. “covered work” means either unmodified Program work based Program. “propagate” work means anything , without permission, make directly secondarily liable infringement applicable copyright law, except executing computer modifying private copy. Propagation includes copying, distribution (without modification), making available public, countries activities well. “convey” work means kind propagation enables parties make receive copies. Mere interaction user computer network, transfer copy, conveying. interactive user interface displays “Appropriate Legal Notices” extent includes convenient prominently visible feature (1) displays appropriate copyright notice, (2) tells user warranty work (except extent warranties provided), licensees may convey work License, view copy License. interface presents list user commands options, menu, prominent item list meets criterion.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_1-source-code","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"1. Source Code","title":"GNU General Public License","text":"“source code” work means preferred form work making modifications . “Object code” means non-source form work. “Standard Interface” means interface either official standard defined recognized standards body, , case interfaces specified particular programming language, one widely used among developers working language. “System Libraries” executable work include anything, work whole, () included normal form packaging Major Component, part Major Component, (b) serves enable use work Major Component, implement Standard Interface implementation available public source code form. “Major Component”, context, means major essential component (kernel, window system, ) specific operating system () executable work runs, compiler used produce work, object code interpreter used run . “Corresponding Source” work object code form means source code needed generate, install, (executable work) run object code modify work, including scripts control activities. However, include work’s System Libraries, general-purpose tools generally available free programs used unmodified performing activities part work. example, Corresponding Source includes interface definition files associated source files work, source code shared libraries dynamically linked subprograms work specifically designed require, intimate data communication control flow subprograms parts work. Corresponding Source need include anything users can regenerate automatically parts Corresponding Source. Corresponding Source work source code form work.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_2-basic-permissions","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"2. Basic Permissions","title":"GNU General Public License","text":"rights granted License granted term copyright Program, irrevocable provided stated conditions met. License explicitly affirms unlimited permission run unmodified Program. output running covered work covered License output, given content, constitutes covered work. License acknowledges rights fair use equivalent, provided copyright law. may make, run propagate covered works convey, without conditions long license otherwise remains force. may convey covered works others sole purpose make modifications exclusively , provide facilities running works, provided comply terms License conveying material control copyright. thus making running covered works must exclusively behalf, direction control, terms prohibit making copies copyrighted material outside relationship . Conveying circumstances permitted solely conditions stated . Sublicensing allowed; section 10 makes unnecessary.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_3-protecting-users-legal-rights-from-anti-circumvention-law","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"3. Protecting Users’ Legal Rights From Anti-Circumvention Law","title":"GNU General Public License","text":"covered work shall deemed part effective technological measure applicable law fulfilling obligations article 11 WIPO copyright treaty adopted 20 December 1996, similar laws prohibiting restricting circumvention measures. convey covered work, waive legal power forbid circumvention technological measures extent circumvention effected exercising rights License respect covered work, disclaim intention limit operation modification work means enforcing, work’s users, third parties’ legal rights forbid circumvention technological measures.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_4-conveying-verbatim-copies","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"4. Conveying Verbatim Copies","title":"GNU General Public License","text":"may convey verbatim copies Program’s source code receive , medium, provided conspicuously appropriately publish copy appropriate copyright notice; keep intact notices stating License non-permissive terms added accord section 7 apply code; keep intact notices absence warranty; give recipients copy License along Program. may charge price price copy convey, may offer support warranty protection fee.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_5-conveying-modified-source-versions","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"5. Conveying Modified Source Versions","title":"GNU General Public License","text":"may convey work based Program, modifications produce Program, form source code terms section 4, provided also meet conditions: ) work must carry prominent notices stating modified , giving relevant date. b) work must carry prominent notices stating released License conditions added section 7. requirement modifies requirement section 4 “keep intact notices”. c) must license entire work, whole, License anyone comes possession copy. License therefore apply, along applicable section 7 additional terms, whole work, parts, regardless packaged. License gives permission license work way, invalidate permission separately received . d) work interactive user interfaces, must display Appropriate Legal Notices; however, Program interactive interfaces display Appropriate Legal Notices, work need make . compilation covered work separate independent works, nature extensions covered work, combined form larger program, volume storage distribution medium, called “aggregate” compilation resulting copyright used limit access legal rights compilation’s users beyond individual works permit. Inclusion covered work aggregate cause License apply parts aggregate.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_6-conveying-non-source-forms","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"6. Conveying Non-Source Forms","title":"GNU General Public License","text":"may convey covered work object code form terms sections 4 5, provided also convey machine-readable Corresponding Source terms License, one ways: ) Convey object code , embodied , physical product (including physical distribution medium), accompanied Corresponding Source fixed durable physical medium customarily used software interchange. b) Convey object code , embodied , physical product (including physical distribution medium), accompanied written offer, valid least three years valid long offer spare parts customer support product model, give anyone possesses object code either (1) copy Corresponding Source software product covered License, durable physical medium customarily used software interchange, price reasonable cost physically performing conveying source, (2) access copy Corresponding Source network server charge. c) Convey individual copies object code copy written offer provide Corresponding Source. alternative allowed occasionally noncommercially, received object code offer, accord subsection 6b. d) Convey object code offering access designated place (gratis charge), offer equivalent access Corresponding Source way place charge. need require recipients copy Corresponding Source along object code. place copy object code network server, Corresponding Source may different server (operated third party) supports equivalent copying facilities, provided maintain clear directions next object code saying find Corresponding Source. Regardless server hosts Corresponding Source, remain obligated ensure available long needed satisfy requirements. e) Convey object code using peer--peer transmission, provided inform peers object code Corresponding Source work offered general public charge subsection 6d. separable portion object code, whose source code excluded Corresponding Source System Library, need included conveying object code work. “User Product” either (1) “consumer product”, means tangible personal property normally used personal, family, household purposes, (2) anything designed sold incorporation dwelling. determining whether product consumer product, doubtful cases shall resolved favor coverage. particular product received particular user, “normally used” refers typical common use class product, regardless status particular user way particular user actually uses, expects expected use, product. product consumer product regardless whether product substantial commercial, industrial non-consumer uses, unless uses represent significant mode use product. “Installation Information” User Product means methods, procedures, authorization keys, information required install execute modified versions covered work User Product modified version Corresponding Source. information must suffice ensure continued functioning modified object code case prevented interfered solely modification made. convey object code work section , , specifically use , User Product, conveying occurs part transaction right possession use User Product transferred recipient perpetuity fixed term (regardless transaction characterized), Corresponding Source conveyed section must accompanied Installation Information. requirement apply neither third party retains ability install modified object code User Product (example, work installed ROM). requirement provide Installation Information include requirement continue provide support service, warranty, updates work modified installed recipient, User Product modified installed. Access network may denied modification materially adversely affects operation network violates rules protocols communication across network. Corresponding Source conveyed, Installation Information provided, accord section must format publicly documented (implementation available public source code form), must require special password key unpacking, reading copying.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_7-additional-terms","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"7. Additional Terms","title":"GNU General Public License","text":"“Additional permissions” terms supplement terms License making exceptions one conditions. Additional permissions applicable entire Program shall treated though included License, extent valid applicable law. additional permissions apply part Program, part may used separately permissions, entire Program remains governed License without regard additional permissions. convey copy covered work, may option remove additional permissions copy, part . (Additional permissions may written require removal certain cases modify work.) may place additional permissions material, added covered work, can give appropriate copyright permission. Notwithstanding provision License, material add covered work, may (authorized copyright holders material) supplement terms License terms: ) Disclaiming warranty limiting liability differently terms sections 15 16 License; b) Requiring preservation specified reasonable legal notices author attributions material Appropriate Legal Notices displayed works containing ; c) Prohibiting misrepresentation origin material, requiring modified versions material marked reasonable ways different original version; d) Limiting use publicity purposes names licensors authors material; e) Declining grant rights trademark law use trade names, trademarks, service marks; f) Requiring indemnification licensors authors material anyone conveys material (modified versions ) contractual assumptions liability recipient, liability contractual assumptions directly impose licensors authors. non-permissive additional terms considered “restrictions” within meaning section 10. Program received , part , contains notice stating governed License along term restriction, may remove term. license document contains restriction permits relicensing conveying License, may add covered work material governed terms license document, provided restriction survive relicensing conveying. add terms covered work accord section, must place, relevant source files, statement additional terms apply files, notice indicating find applicable terms. Additional terms, permissive non-permissive, may stated form separately written license, stated exceptions; requirements apply either way.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_8-termination","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"8. Termination","title":"GNU General Public License","text":"may propagate modify covered work except expressly provided License. attempt otherwise propagate modify void, automatically terminate rights License (including patent licenses granted third paragraph section 11). However, cease violation License, license particular copyright holder reinstated () provisionally, unless copyright holder explicitly finally terminates license, (b) permanently, copyright holder fails notify violation reasonable means prior 60 days cessation. Moreover, license particular copyright holder reinstated permanently copyright holder notifies violation reasonable means, first time received notice violation License (work) copyright holder, cure violation prior 30 days receipt notice. Termination rights section terminate licenses parties received copies rights License. rights terminated permanently reinstated, qualify receive new licenses material section 10.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_9-acceptance-not-required-for-having-copies","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"9. Acceptance Not Required for Having Copies","title":"GNU General Public License","text":"required accept License order receive run copy Program. Ancillary propagation covered work occurring solely consequence using peer--peer transmission receive copy likewise require acceptance. However, nothing License grants permission propagate modify covered work. actions infringe copyright accept License. Therefore, modifying propagating covered work, indicate acceptance License .","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_10-automatic-licensing-of-downstream-recipients","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"10. Automatic Licensing of Downstream Recipients","title":"GNU General Public License","text":"time convey covered work, recipient automatically receives license original licensors, run, modify propagate work, subject License. responsible enforcing compliance third parties License. “entity transaction” transaction transferring control organization, substantially assets one, subdividing organization, merging organizations. propagation covered work results entity transaction, party transaction receives copy work also receives whatever licenses work party’s predecessor interest give previous paragraph, plus right possession Corresponding Source work predecessor interest, predecessor can get reasonable efforts. may impose restrictions exercise rights granted affirmed License. example, may impose license fee, royalty, charge exercise rights granted License, may initiate litigation (including cross-claim counterclaim lawsuit) alleging patent claim infringed making, using, selling, offering sale, importing Program portion .","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_11-patents","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"11. Patents","title":"GNU General Public License","text":"“contributor” copyright holder authorizes use License Program work Program based. work thus licensed called contributor’s “contributor version”. contributor’s “essential patent claims” patent claims owned controlled contributor, whether already acquired hereafter acquired, infringed manner, permitted License, making, using, selling contributor version, include claims infringed consequence modification contributor version. purposes definition, “control” includes right grant patent sublicenses manner consistent requirements License. contributor grants non-exclusive, worldwide, royalty-free patent license contributor’s essential patent claims, make, use, sell, offer sale, import otherwise run, modify propagate contents contributor version. following three paragraphs, “patent license” express agreement commitment, however denominated, enforce patent (express permission practice patent covenant sue patent infringement). “grant” patent license party means make agreement commitment enforce patent party. convey covered work, knowingly relying patent license, Corresponding Source work available anyone copy, free charge terms License, publicly available network server readily accessible means, must either (1) cause Corresponding Source available, (2) arrange deprive benefit patent license particular work, (3) arrange, manner consistent requirements License, extend patent license downstream recipients. “Knowingly relying” means actual knowledge , patent license, conveying covered work country, recipient’s use covered work country, infringe one identifiable patents country reason believe valid. , pursuant connection single transaction arrangement, convey, propagate procuring conveyance , covered work, grant patent license parties receiving covered work authorizing use, propagate, modify convey specific copy covered work, patent license grant automatically extended recipients covered work works based . patent license “discriminatory” include within scope coverage, prohibits exercise , conditioned non-exercise one rights specifically granted License. may convey covered work party arrangement third party business distributing software, make payment third party based extent activity conveying work, third party grants, parties receive covered work , discriminatory patent license () connection copies covered work conveyed (copies made copies), (b) primarily connection specific products compilations contain covered work, unless entered arrangement, patent license granted, prior 28 March 2007. Nothing License shall construed excluding limiting implied license defenses infringement may otherwise available applicable patent law.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_12-no-surrender-of-others-freedom","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"12. No Surrender of Others’ Freedom","title":"GNU General Public License","text":"conditions imposed (whether court order, agreement otherwise) contradict conditions License, excuse conditions License. convey covered work satisfy simultaneously obligations License pertinent obligations, consequence may convey . example, agree terms obligate collect royalty conveying convey Program, way satisfy terms License refrain entirely conveying Program.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_13-use-with-the-gnu-affero-general-public-license","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"13. Use with the GNU Affero General Public License","title":"GNU General Public License","text":"Notwithstanding provision License, permission link combine covered work work licensed version 3 GNU Affero General Public License single combined work, convey resulting work. terms License continue apply part covered work, special requirements GNU Affero General Public License, section 13, concerning interaction network apply combination .","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_14-revised-versions-of-this-license","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"14. Revised Versions of this License","title":"GNU General Public License","text":"Free Software Foundation may publish revised /new versions GNU General Public License time time. new versions similar spirit present version, may differ detail address new problems concerns. version given distinguishing version number. Program specifies certain numbered version GNU General Public License “later version” applies , option following terms conditions either numbered version later version published Free Software Foundation. Program specify version number GNU General Public License, may choose version ever published Free Software Foundation. Program specifies proxy can decide future versions GNU General Public License can used, proxy’s public statement acceptance version permanently authorizes choose version Program. Later license versions may give additional different permissions. However, additional obligations imposed author copyright holder result choosing follow later version.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_15-disclaimer-of-warranty","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"15. Disclaimer of Warranty","title":"GNU General Public License","text":"WARRANTY PROGRAM, EXTENT PERMITTED APPLICABLE LAW. EXCEPT OTHERWISE STATED WRITING COPYRIGHT HOLDERS /PARTIES PROVIDE PROGRAM “” WITHOUT WARRANTY KIND, EITHER EXPRESSED IMPLIED, INCLUDING, LIMITED , IMPLIED WARRANTIES MERCHANTABILITY FITNESS PARTICULAR PURPOSE. ENTIRE RISK QUALITY PERFORMANCE PROGRAM . PROGRAM PROVE DEFECTIVE, ASSUME COST NECESSARY SERVICING, REPAIR CORRECTION.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_16-limitation-of-liability","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"16. Limitation of Liability","title":"GNU General Public License","text":"EVENT UNLESS REQUIRED APPLICABLE LAW AGREED WRITING COPYRIGHT HOLDER, PARTY MODIFIES /CONVEYS PROGRAM PERMITTED , LIABLE DAMAGES, INCLUDING GENERAL, SPECIAL, INCIDENTAL CONSEQUENTIAL DAMAGES ARISING USE INABILITY USE PROGRAM (INCLUDING LIMITED LOSS DATA DATA RENDERED INACCURATE LOSSES SUSTAINED THIRD PARTIES FAILURE PROGRAM OPERATE PROGRAMS), EVEN HOLDER PARTY ADVISED POSSIBILITY DAMAGES.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_17-interpretation-of-sections-15-and-16","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"17. Interpretation of Sections 15 and 16","title":"GNU General Public License","text":"disclaimer warranty limitation liability provided given local legal effect according terms, reviewing courts shall apply local law closely approximates absolute waiver civil liability connection Program, unless warranty assumption liability accompanies copy Program return fee. END TERMS CONDITIONS","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"how-to-apply-these-terms-to-your-new-programs","dir":"","previous_headings":"","what":"How to Apply These Terms to Your New Programs","title":"GNU General Public License","text":"develop new program, want greatest possible use public, best way achieve make free software everyone can redistribute change terms. , attach following notices program. safest attach start source file effectively state exclusion warranty; file least “copyright” line pointer full notice found. Also add information contact electronic paper mail. program terminal interaction, make output short notice like starts interactive mode: hypothetical commands show w show c show appropriate parts General Public License. course, program’s commands might different; GUI interface, use “box”. also get employer (work programmer) school, , sign “copyright disclaimer” program, necessary. information , apply follow GNU GPL, see <http://www.gnu.org/licenses/>. GNU General Public License permit incorporating program proprietary programs. program subroutine library, may consider useful permit linking proprietary applications library. want , use GNU Lesser General Public License instead License. first, please read <http://www.gnu.org/philosophy/--lgpl.html>.","code":"<one line to give the program's name and a brief idea of what it does.> Copyright (C) 2020 Mark Myatt and Ernest Guevarra  This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.  This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.  You should have received a copy of the GNU General Public License along with this program.  If not, see <http://www.gnu.org/licenses/>. nipnTK Copyright (C) 2020 Mark Myatt and Ernest Guevarra This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free software, and you are welcome to redistribute it under certain conditions; type 'show c' for details."},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"assessing-the-distribution-of-anthropometric-variables-indices-and-indicators","dir":"Articles","previous_headings":"","what":"Assessing the distribution of anthropometric variables, indices, and indicators","title":"Distributions of variables and indices","text":"section examine distribution anthropometric variables (e.g. weight, height, MUAC), anthropometric indices (e.g. WHZ, HAZ, WHZ), anthropometric indicators (e.g. wasted, stunted, underweight). Topics distribution age, age sex, age-heaping, digit preference covered sections toolkit. retrieve survey dataset: file dist.ex01.csv comma-separated-value (CSV) file containing anthropometric data SMART survey Kabul, Afghanistan.","code":"svy <- read.table(\"dist.ex01.csv\", header = TRUE, sep = \",\") head(svy)"},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"numerical-summaries","dir":"Articles","previous_headings":"","what":"Numerical summaries","title":"Distributions of variables and indices","text":"summary() function R provides six-figure summary (.e. minimum, first quartile, median, means, third quartile, maximum) numeric variable. example: returns: six-figure summary report standard deviation. sd() function R calculates standard deviation. example: returns: sd() function may return NA. happen missing values specified variable. happens can instruct function ignore missing values: returns value: Using na.rm parameter way (.e. specifying na.rm = TRUE) works many descriptive functions R (see table ). descriptive functions R","code":"summary(svy$weight) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    4.90    9.00   11.00   11.13   13.10   20.70 sd(svy$weight) #> [1] 2.802065 sd(svy$weight, na.rm = TRUE) #> [1] 2.802065"},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"graphical-and-numerical-summaries","dir":"Articles","previous_headings":"","what":"Graphical and numerical summaries","title":"Distributions of variables and indices","text":"Numerical summaries useful checking data within expected range. Graphical methods often informative numerical summaries. key graphical method examining distribution variable histogram. example: displays histogram weight variable example dataset (see figure ).  need careful examining distribution measurements, may vary sex. example: display heights males females. , display two separate distributions single distribution.  case sensible look data males data females using separate histograms:  using box-plot:  Numerical summaries can also used: returns:","code":"hist(svy$weight) hist(svy$height) hist(svy$height[svy$sex == 1])  hist(svy$height[svy$sex == 2]) boxplot(svy$height ~ svy$sex, names = c(\"M\", \"F\"),          xlab = \"Sex\", ylab = \"Height (cm)\", main = \"Height by sex\") by(svy$height, svy$sex, summary) #> svy$sex: 1 #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>   56.20   75.00   81.95   82.49   90.00  110.50  #> ------------------------------------------------------------  #> svy$sex: 2 #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>   58.00   73.25   80.30   81.30   88.95  109.50"},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"normal-distributions","dir":"Articles","previous_headings":"","what":"Normal distributions","title":"Distributions of variables and indices","text":"anthropometric variables indices usually expect symmetrical (nearly symmetrical) “bell-shaped” distribution. variables indices interest usually: plots shown .  Histograms showing distribution anthropometric indices example dataset number size “intervals” (breaks) used plotting histogram calculated produce useful plot. intervals used based range data. can specify different set breaks hist() function use. example:  calculates intervals using standard deviation sample size. :  calculates intervals using inter-quartile range. :  use 40 intervals. :  uses intervals 0.5 z-scores wide full range haz. plots show nearly symmetrical “bell-shaped” distributions. ideal symmetrical “bell-shaped” distribution normal distribution. number ways assessing whether variable normally distributed. first way assessing whether variable normally distributed simple “-eye” assessment already done using histograms. NiPN data quality toolkit provides R language function called histNormal() can help “-eye” assessments superimposing normal curve histogram variable interest: plots shown . variables appear approximately normally distributed.  Histograms anthropometric indices normal curves superimposed Changing breaks parameter may make histogram easier “read”. example:  Another graphical method assessing whether variable normally distributed normal quantile-quantile plot. easy produce using R. NiPN data quality toolkit provided helper function called qqNormalPlot() produces slightly enhanced normal quantile-quantile plot:  plot shown (annotations). example tails distribution contain cases expected perfectly normally distributed variable. Annotated normal quantile-quantile plot whz variable example dataset examine relevant variables: plots shown . evidence small deviations normality muac, haz, whz.  Normal quantile-quantile plots anthropometric indices example dataset final way assessing normality use formal statistical significance test. preferred test Shapiro-Wilk test normality: tests indicate muac, haz, whz significantly non-normal. Examination histograms normal quantile-quantile plots show deviation normality indices particular marked. indices symmetrical, nearly symmetrical, “bell-shaped” distributions. need careful using significance tests Shapiro-Wilk test normality results can strongly influenced sample size. Small sample sizes can lead tests missing large effects large sample sizes can lead tests identifying small effects highly significant. analysis found highly significant small deviations normality probably detected significance test smaller sample size used. can simulate considerably smaller sample size taking, example, every fourth muac value: Inspecting smaller sample graphically:  yields results similar found complete sample used, formal test: longer significant p < 0.05. distribution appears normal (.e. symmetrical, nearly symmetrical, “bell-shaped” distribution) usually safe assume normality use statistical procedures assume normality. Formal tests normality can misleading sample sizes hundred cases used. Graphical methods useful sample sizes small. Formal test useful sample sizes large. sample sizes anthropometry surveys large enough cause formal tests normality identify small deviations normality highly significant.","code":"hist(svy$muac)  hist(svy$haz)  hist(svy$waz)  hist(svy$whz) hist(svy$haz, breaks = \"scott\") hist(svy$haz, breaks = \"FD\") hist(svy$haz, breaks = 40) hist(svy$haz,       breaks = seq(from = floor(min(svy$haz)), to = ceiling(max(svy$haz)), by = 0.5)) histNormal(svy$muac) histNormal(svy$haz) histNormal(svy$waz) histNormal(svy$whz) histNormal(svy$haz, breaks = 15) qqNormalPlot(svy$whz) qqNormalPlot(svy$muac)  qqNormalPlot(svy$haz)  qqNormalPlot(svy$waz)  qqNormalPlot(svy$whz) shapiro.test(svy$muac)  #>  #>  Shapiro-Wilk normality test #>  #> data:  svy$muac #> W = 0.99496, p-value = 0.005495 shapiro.test(svy$haz)  #>  #>  Shapiro-Wilk normality test #>  #> data:  svy$haz #> W = 0.99348, p-value = 0.0007455 shapiro.test(svy$waz)  #>  #>  Shapiro-Wilk normality test #>  #> data:  svy$waz #> W = 0.99827, p-value = 0.5358 shapiro.test(svy$whz) #>  #>  Shapiro-Wilk normality test #>  #> data:  svy$whz #> W = 0.99078, p-value = 2.777e-05 length(svy$muac) #> [1] 873 oneQuarter <- svy$muac[seq(from = 1, to = length(svy$muac), by = 4)]  length(oneQuarter) #> [1] 219 histNormal(oneQuarter)  qqNormalPlot(oneQuarter) shapiro.test(oneQuarter) #>  #>  Shapiro-Wilk normality test #>  #> data:  oneQuarter #> W = 0.98836, p-value = 0.0724"},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"skew-and-kurtosis","dir":"Articles","previous_headings":"","what":"Skew and kurtosis","title":"Distributions of variables and indices","text":"Skew measure asymmetry distribution mean. Skew can zero, positive, negative. Zero skew found distribution perfectly symmetrical. Positive skew found long right tail distribution mass distribution concentrated left. Negative skew found long left tail distribution mass distribution concentrated right. can usually see skew histograms. can also calculate skewness statistic test significantly different zero. Kurtosis measure much distribution concentrated mean. Kurtosis can zero, positive, negative. Zero kurtosis found variable normally distributed. Positive kurtosis found mass distribution concentrated mean values far mean. Negative kurtosis found mass distribution concentrated tails distribution. can usually see kurtosis histograms. can also calculate kurtosis statistic test significantly different zero. NiPN data quality toolkit provides R language function called skewKurt() calculates skewness kurtosis statistics tests whether differ significantly zero. apply skewKurt() function muac variable example dataset: returns: positive skew negative kurtosis. Neither significantly different zero. Applying skewKurt() function haz variable example dataset: returns: positive skew positive kurtosis. skew significantly different zero. skew can seen histogram:  Applying skewKurt() function waz variable example dataset: returns: negative skew positive kurtosis. Neither significantly different zero. Applying skewKurt() function whz variable example dataset: returns: positive skew positive kurtosis. kurtosis significantly different zero. kurtosis can seen histogram:  tall central columns exceed expected values shown overlaid normal distribution. Skew kurtosis used SMART plausibility checks. Table shows skew kurtosis statistics applied SMART. range absolute values skewness kurtosis statistics applied SMART (2015) whz variable example dataset considered “problematic” according scheme kurtosis 0.6. Care exercised using statistical significance tests classify data “problematic”. use thresholds ranges skew kurtosis statistics usually better approach relying tests statistical significance. Significance tests can strongly affected sample size. Small sample sizes can lead tests missing large effects large sample sizes can lead tests identifying small effects highly significant. distribution appears normal (.e. symmetrical, nearly symmetrical, “bell-shaped” distribution) usually safe assume normality use statistical procedures assume normality. important remember normal distribution mathematical abstraction. nothing compelling real world conform normal distribution. normal distribution become reified: Everyone sure [normal distribution] … experimentalists believe mathematical theorem, mathematicians experimentally determined fact.  — Henri Poincaré (1912), Calcul des Probabilités data see may representative reality even fails tests normality. Tests normality useful selecting statistical methods rely normality. less useful determining data quality. data follows symmetrical, nearly symmetrical, “bell-shaped” distribution usually safe use.","code":"skewKurt(svy$muac) #>  #>  Skewness and kurtosis #>  #> Skewness = +0.0525   SE = 0.0828 z = 0.6348  p = 0.5256 #> Kurtosis = -0.2412   SE = 0.1653 z = 1.4586  p = 0.1447 skewKurt(svy$haz) #>  #>  Skewness and kurtosis #>  #> Skewness = +0.3074   SE = 0.0828 z = 3.7149  p = 0.0002 #> Kurtosis = +0.2074   SE = 0.1653 z = 1.2545  p = 0.2097 histNormal(svy$haz, breaks = \"scott\") skewKurt(svy$waz) #>  #>  Skewness and kurtosis #>  #> Skewness = -0.0128   SE = 0.0828 z = 0.1541  p = 0.8775 #> Kurtosis = +0.1805   SE = 0.1653 z = 1.0919  p = 0.2749 skewKurt(svy$whz) #>  #>  Skewness and kurtosis #>  #> Skewness = +0.0823   SE = 0.0828 z = 0.9946  p = 0.3199 #> Kurtosis = +0.7528   SE = 0.1653 z = 4.5530  p = 0.0000 histNormal(svy$whz, breaks = \"scott\")"},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"deviation-from-normality","dir":"Articles","previous_headings":"","what":"Deviation from normality","title":"Distributions of variables and indices","text":"anthropometric survey methods (e.g. SMART) use deviations perfect normality indicator poor data quality. sensible approach deviations normality necessarily due poor quality data; can due sampling mixed population. easy demonstrate simulated data. assume population consisting two groups: Group 1 : 75% population, mean = -0.48, sd = 0.87 Group 2 : 25% population, mean = -1.04, sd = 1.10 take sample size = 1000 whole population. can simulate : distributions two subgroups (g1 g2) normally distributed:   distribution entire sample (g1g2) normal:  Shapiro-Wilk test normality returns: statistically significant negative skew: , however, nothing wrong sample data distribution entire sample (g1g2) called “mixture Gaussians” (term “Gaussian” refers normal distribution context). can see mixture Gaussians :  case mixture already known. number methods revealing underlying mixture components mixture unknown. techniques covered toolkit. , however, continue example components mixture suspected. expect see small deviations normality survey datasets. often case survey samples subjects wide area covering, example, several agro-ecological zones, socio-economic groups, ethnic groups. almost always case, particularly large surveys DHS, MICS, national SMART surveys. Another reason non-normality one () survey teams systematic bias making measurement. Identifying “offending” survey team examining testing normality separately combinations data \\(n ~ – ~ 1\\) survey teams can attempted. (e.g.) three teams need separately test data : Team 1 Team 2 (Team 3 excluded) Team 1 Team 3 (Team 2 excluded) Team 2 Team 3 (Team 1 excluded) see deviation normality disappears particular team’s data excluded. , however, problem type analysis. cluster-sampled surveys, teams often sample adjacent primary sampling units (clusters). occurs “exclude one team” analysis distinguish differences due spatial heterogeneity (.e. patchiness) differences due team systematic measurement bias.","code":"set.seed(0) g1 <- rnorm(n = 750, mean = -0.48, sd = 0.87)  g2 <- rnorm(n = 250, mean = -1.04, sd = 1.11)  g1g2 <- c(g1, g2) histNormal(g1)  qqNormalPlot(g1) shapiro.test(g1)  skewKurt(g1) #>  #>  Shapiro-Wilk normality test #>  #> data:  g1 #> W = 0.99725, p-value = 0.2411 #>  #>  Skewness and kurtosis #>  #> Skewness = +0.1149   SE = 0.0893 z = 1.2867  p = 0.1982 #> Kurtosis = -0.1869   SE = 0.1783 z = 1.0483  p = 0.2945 histNormal(g2)  qqNormalPlot(g2)  shapiro.test(g2)  skewKurt(g2) #>  #>  Shapiro-Wilk normality test #>  #> data:  g2 #> W = 0.9947, p-value = 0.5363 #>  #>  Skewness and kurtosis #>  #> Skewness = +0.0317   SE = 0.1540 z = 0.2058  p = 0.8369 #> Kurtosis = -0.1282   SE = 0.3068 z = 0.4178  p = 0.6761 histNormal(g1g2)  qqNormalPlot(g1g2)  shapiro.test(g1g2)  skewKurt(g1g2) #>  #>  Shapiro-Wilk normality test #>  #> data:  g1g2 #> W = 0.99671, p-value = 0.03514 #>  #>  Skewness and kurtosis #>  #> Skewness = -0.1767   SE = 0.0773 z = 2.2851  p = 0.0223 #> Kurtosis = +0.2894   SE = 0.1545 z = 1.8728  p = 0.0611 hist(g1, col=rgb(0.2, 0.2, 0.2, 0.5),      breaks = seq(-5, 3, 0.5), xlab = \"\", main = \"\") hist(g2, col=rgb(0.8, 0.8, 0.8, 0.5), breaks = seq(-5, 3, 0.5), add = TRUE)  title(main = \"Histogram of g1 and g2\", xlab = \"z-score\")"},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"the-standard-deviation-and-alternatives","dir":"Articles","previous_headings":"","what":"The standard deviation and alternatives","title":"Distributions of variables and indices","text":"standard deviation sometimes considered useful measure data quality applied z-scores. can use sd() function find standard deviation. example: returns: 1.323469 may produce misleading values applied raw data. procedure applied cleaned data erroneous data flagged records censored. SMART guidelines state acceptable range standard deviation weight--height z-scores (whz) 0.8 1.2 flagging criteria applied flagged records censored. Standard deviations outside range considered indicate poor survey quality. Note SMART define threshold anthropometric indices weight--height z-scores. important note standard deviation 1.2 may due sampling mixed population rather due poor data quality. flag column example dataset contains flagging code codes 2, 3, 6, 7 indicate potential problems weight / height. calculate standard deviation whz variable using data records flagging codes censored oedema recorded: ! character specifies logical “”. standard deviation , therefore, calculated using records flag variable contain 2, 3, 6, 7 oedema recorded present. standard deviation whz flagged records oedema cases censored : within SMART acceptable range 0.8 1.2. problem using standard deviation raw data non-robust statistic. means can strongly influenced outliers. example: returns: Adding single outlier (e.g. data entered 7.84 rather 4.78): returns: example single outlier strongly influenced standard deviation. number robust estimators standard deviation. R provides mad() function calculate adjusted median absolute deviation (MAD). median absolute deviation (MAD) defined median absolute deviations median. median absolute values differences individual data points median data: \\[ MAD ~ = ~ ( | x_i ~ - ~ median(x) | ) \\] calculated MAD adjusted make consistent standard deviation: \\[ \\hat{\\sigma} ~ = ~ k ~ \\times ~ MAD  \\] k constant scaling factor, depends upon distribution. normal distribution: \\[ k ~ = ~ 1.4826 \\] mad() function R function returns adjusted MAD: \\[ \\hat{\\sigma} ~ = ~ 1.4826 ~ \\times ~ MAD \\] robust estimate standard deviation. estimator preferred sample taken mixed population (almost always case) distribution “fat” “heavy” tails, case whz variable example dataset. Using mad() function raw WHZ data: returns: usually want calculate adjusted MAD whz variable using data records flagging codes relevant whz cases oedema censored: returns: use standard deviation robust equivalents adjusted MAD simple thresholds problematic. Data mixture Gaussians distributions tend large standard deviations even systematic error nothing wrong sample. Checks standard deviation large surveys , therefore, performed smallest spatial strata PSU cluster level. reduces eliminate problem sampling mixed populations. retrieve dataset examine within-strata MADs: file flag.ex03.csv comma-separated-value (CSV) file containing anthropometric data national SMART survey Nigeria. data stored file flag.ex03.csv collected using methods similar MICS DHS surveys. difference survey concentrated anthropometric data children aged 6 59 months. exercise concentrate WHZ. Data stratified region state within region. create new variable combines region state: can examine adjusted MAD whz combination region state survey dataset using: long output can made compact, easier read, easier work : saved mads object can summarised: returns: table can also useful: example adjusted MAD whz variable within limits 0.8 1.2 combinations region state. Note combined region state. avoid potential problems duplicate state names (.e. state name used one region). previous exercise used raw (.e. without flagging) data. better use data records flagging codes relevant whz cases oedema censored. national SMART survey use SMART flagging criteria. use national.SMART() function add SMART flags survey dataset: need exclude records flagging codes relevant whz: Note oedema recorded dataset exclude oedema cases. can now calculate MAD whz stratum: saved mads object can summarised: returns: analysis adjusted MAD whz variable within limits 0.8 1.2 combinations region state.","code":"sd(svy$whz) #> [1] 1.323469 sd(svy$whz[!(svy$flag %in% c(2, 3, 6, 7) | svy$oedema == 1)]) #> [1] 1.141944 sd(c(4.55, 5.93, 2.68, 5.61, 3.53, 4.78, 3.60, 5.82, 4.41, 5.42)) #> [1] 1.097533 sd(c(4.55, 5.93, 2.68, 5.61, 3.53, 7.84, 3.60, 5.82, 4.41, 5.42)) #> [1] 1.496963 mad(svy$whz) #> [1] 1.156428 mad(svy$whz[!(svy$flag %in% c(2, 3, 6, 7) | svy$oedema == 1)]) #> [1] 1.097124 svy <- read.table(\"flag.ex03.csv\", header = TRUE, sep = \",\") head(svy) #>   psu region state age sex weight height   haz   waz   whz #> 1   1     SE  Abia  12   2    7.4   72.1 -0.74 -1.58 -1.69 #> 2   1     SE  Abia  33   1   13.3   94.2  0.04 -0.33 -0.52 #> 3   1     SE  Abia  44   2   14.1   98.6 -0.41 -0.63 -0.57 #> 4   1     SE  Abia  40   2   15.8   99.3  0.39  0.59  0.55 #> 5   1     SE  Abia  23   2   10.1   83.9 -0.51 -0.90 -0.92 #> 6   1     SE  Abia  24   1   13.9   88.7  0.52  1.18  1.22 svy$regionState <- paste(svy$region, svy$state, sep = \":\") head(svy) #>   psu region state age sex weight height   haz   waz   whz regionState #> 1   1     SE  Abia  12   2    7.4   72.1 -0.74 -1.58 -1.69     SE:Abia #> 2   1     SE  Abia  33   1   13.3   94.2  0.04 -0.33 -0.52     SE:Abia #> 3   1     SE  Abia  44   2   14.1   98.6 -0.41 -0.63 -0.57     SE:Abia #> 4   1     SE  Abia  40   2   15.8   99.3  0.39  0.59  0.55     SE:Abia #> 5   1     SE  Abia  23   2   10.1   83.9 -0.51 -0.90 -0.92     SE:Abia #> 6   1     SE  Abia  24   1   13.9   88.7  0.52  1.18  1.22     SE:Abia table(svy$regionState) #>  #>       NC:Benue NC:FCT (Abuja)        NC:Kogi       NC:Kwara    NC:Nasarawa  #>            386            363            326            392            430  #>       NC:Niger     NC:Plateau     NE:Adamawa      NE:Bauchi       NE:Borno  #>            589            503            410            804            558  #>       NE:Gombe      NE:Taraba        NE:Yobe      NW:Jigawa      NW:Kaduna  #>            643            421            689            711            536  #>        NW:Kano     NW:Katsina       NW:Kebbi      NW:Sokoto     NW:Zamfara  #>            671            657            728            646            668  #>        SE:Abia     SE:Anambra      SE:Ebonyi       SE:Enugu         SE:Imo  #>            334            390            455            418            371  #>   SS:Akwa-Ibom     SS:Bayelsa SS:Cross River       SS:Delta         SS:Edo  #>            331            330            376            346            480  #>      SS:Rivers       SW:Ekiti       SW:Lagos        SW:Ogun        SW:Ondo  #>            315            376            640            566            426  #>        SW:Osun         SW:Oyo  #>            435            610 by(svy$whz, svy$regionState, mad, na.rm = TRUE) #> svy$regionState: NC:Benue #> [1] 0.941451 #> ------------------------------------------------------------  #> svy$regionState: NC:FCT (Abuja) #> [1] 0.96369 #> ------------------------------------------------------------  #> svy$regionState: NC:Kogi #> [1] 0.993342 #> ------------------------------------------------------------  #> svy$regionState: NC:Kwara #> [1] 0.993342 #> ------------------------------------------------------------  #> svy$regionState: NC:Nasarawa #> [1] 0.926625 #> ------------------------------------------------------------  #> svy$regionState: NC:Niger #> [1] 0.978516 #> ------------------------------------------------------------  #> svy$regionState: NC:Plateau #> [1] 1.022994 #> ------------------------------------------------------------  #> svy$regionState: NE:Adamawa #> [1] 1.045233 #> ------------------------------------------------------------  #> svy$regionState: NE:Bauchi #> [1] 1.18608 #> ------------------------------------------------------------  #> svy$regionState: NE:Borno #> [1] 1.030407 #> ------------------------------------------------------------  #> svy$regionState: NE:Gombe #> [1] 1.082298 #> ------------------------------------------------------------  #> svy$regionState: NE:Taraba #> [1] 1.008168 #> ------------------------------------------------------------  #> svy$regionState: NE:Yobe #> [1] 1.022994 #> ------------------------------------------------------------  #> svy$regionState: NW:Jigawa #> [1] 1.200906 #> ------------------------------------------------------------  #> svy$regionState: NW:Kaduna #> [1] 0.985929 #> ------------------------------------------------------------  #> svy$regionState: NW:Kano #> [1] 1.156428 #> ------------------------------------------------------------  #> svy$regionState: NW:Katsina #> [1] 1.022994 #> ------------------------------------------------------------  #> svy$regionState: NW:Kebbi #> [1] 0.926625 #> ------------------------------------------------------------  #> svy$regionState: NW:Sokoto #> [1] 0.926625 #> ------------------------------------------------------------  #> svy$regionState: NW:Zamfara #> [1] 1.052646 #> ------------------------------------------------------------  #> svy$regionState: SE:Abia #> [1] 0.904386 #> ------------------------------------------------------------  #> svy$regionState: SE:Anambra #> [1] 0.926625 #> ------------------------------------------------------------  #> svy$regionState: SE:Ebonyi #> [1] 0.904386 #> ------------------------------------------------------------  #> svy$regionState: SE:Enugu #> [1] 0.919212 #> ------------------------------------------------------------  #> svy$regionState: SE:Imo #> [1] 0.88956 #> ------------------------------------------------------------  #> svy$regionState: SS:Akwa-Ibom #> [1] 0.904386 #> ------------------------------------------------------------  #> svy$regionState: SS:Bayelsa #> [1] 1.11195 #> ------------------------------------------------------------  #> svy$regionState: SS:Cross River #> [1] 0.971103 #> ------------------------------------------------------------  #> svy$regionState: SS:Delta #> [1] 0.971103 #> ------------------------------------------------------------  #> svy$regionState: SS:Edo #> [1] 0.971103 #> ------------------------------------------------------------  #> svy$regionState: SS:Rivers #> [1] 1.052646 #> ------------------------------------------------------------  #> svy$regionState: SW:Ekiti #> [1] 1.030407 #> ------------------------------------------------------------  #> svy$regionState: SW:Lagos #> [1] 0.837669 #> ------------------------------------------------------------  #> svy$regionState: SW:Ogun #> [1] 0.911799 #> ------------------------------------------------------------  #> svy$regionState: SW:Ondo #> [1] 0.978516 #> ------------------------------------------------------------  #> svy$regionState: SW:Osun #> [1] 0.904386 #> ------------------------------------------------------------  #> svy$regionState: SW:Oyo #> [1] 0.956277 mads <- by(svy$whz, svy$regionState, mad, na.rm = TRUE) mads <- round(mads[1:length(mads)], 2) mads #> svy$regionState #>       NC:Benue NC:FCT (Abuja)        NC:Kogi       NC:Kwara    NC:Nasarawa  #>           0.94           0.96           0.99           0.99           0.93  #>       NC:Niger     NC:Plateau     NE:Adamawa      NE:Bauchi       NE:Borno  #>           0.98           1.02           1.05           1.19           1.03  #>       NE:Gombe      NE:Taraba        NE:Yobe      NW:Jigawa      NW:Kaduna  #>           1.08           1.01           1.02           1.20           0.99  #>        NW:Kano     NW:Katsina       NW:Kebbi      NW:Sokoto     NW:Zamfara  #>           1.16           1.02           0.93           0.93           1.05  #>        SE:Abia     SE:Anambra      SE:Ebonyi       SE:Enugu         SE:Imo  #>           0.90           0.93           0.90           0.92           0.89  #>   SS:Akwa-Ibom     SS:Bayelsa SS:Cross River       SS:Delta         SS:Edo  #>           0.90           1.11           0.97           0.97           0.97  #>      SS:Rivers       SW:Ekiti       SW:Lagos        SW:Ogun        SW:Ondo  #>           1.05           1.03           0.84           0.91           0.98  #>        SW:Osun         SW:Oyo  #>           0.90           0.96 summary(mads) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>  0.8400  0.9300  0.9800  0.9892  1.0300  1.2000 table(mads) #> mads #> 0.84 0.89  0.9 0.91 0.92 0.93 0.94 0.96 0.97 0.98 0.99 1.01 1.02 1.03 1.05 1.08  #>    1    1    4    1    1    4    1    2    3    2    3    1    3    2    3    1  #> 1.11 1.16 1.19  1.2  #>    1    1    1    1 svyFlagged <- national.SMART(x = svy, strata = \"regionState\") svyFlagged <- svyFlagged[!(svyFlagged$flagSMART %in% c(2, 3, 6, 7)), ] mads <- by(svyFlagged$whz, svyFlagged$regionState, mad, na.rm = TRUE) mads <- round(mads[1:length(mads)], 2) mads #> svyFlagged$regionState #>       NC:Benue NC:FCT (Abuja)        NC:Kogi       NC:Kwara    NC:Nasarawa  #>           0.92           0.95           0.99           0.96           0.92  #>       NC:Niger     NC:Plateau     NE:Adamawa      NE:Bauchi       NE:Borno  #>           0.93           1.02           1.02           1.17           1.02  #>       NE:Gombe      NE:Taraba        NE:Yobe      NW:Jigawa      NW:Kaduna  #>           1.06           0.98           0.99           1.17           0.96  #>        NW:Kano     NW:Katsina       NW:Kebbi      NW:Sokoto     NW:Zamfara  #>           1.10           1.01           0.90           0.90           1.02  #>        SE:Abia     SE:Anambra      SE:Ebonyi       SE:Enugu         SE:Imo  #>           0.87           0.91           0.90           0.90           0.87  #>   SS:Akwa-Ibom     SS:Bayelsa SS:Cross River       SS:Delta         SS:Edo  #>           0.87           1.05           0.92           0.95           0.96  #>      SS:Rivers       SW:Ekiti       SW:Lagos        SW:Ogun        SW:Ondo  #>           1.01           1.01           0.85           0.90           0.96  #>        SW:Osun         SW:Oyo  #>           0.89           0.95 summary(mads) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>  0.8500  0.9000  0.9600  0.9665  1.0100  1.1700"},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"measures-of-dispersion","dir":"Articles","previous_headings":"","what":"Measures of dispersion","title":"Distributions of variables and indices","text":"Measures dispersion summarise cases (e.g. children classified wasted, stunted, underweight) distributed across survey’s primary sampling units (e.g. clusters). retrieve survey dataset: file flag.ex01.csv comma-separated-value (CSV) file containing anthropometric data recent SMART survey Sudan. apply flagging criteria data: exclude flagged records: apply case-definition stunted: can examine distribution stunted cases across primary sampling units survey: need counts cases primary sampling unit:  useful keep later use: interested cases distributed across primary sampling units. three general patterns. random, clumped, uniform. can identify pattern example data likely belongs using index dispersion. simplest index dispersion, one used SMART, variance mean ratio: \\[ \\text{Variance mean ratio} ~ = ~ \\frac{s ^ 2}{\\overline{\\chi}} \\] interpretation variance mean ratio straightforward: Variance mean ratio ≈ 1 Random Variance mean ratio > 1 Clumped (.e. clumped random) Variance mean ratio < 1 Uniform (.e. uniform random) value variance mean ratio can range zero (maximum uniformity) total number cases data (maximum clumping). Maximum uniformity found number cases found every primary sampling unit. Maximum clumping found cases found one primary sampling unit. example data: observed variance mean ratio (0.6393127) suggests distribution cases across primary sampling units completely uniform, neither random. formal (Chi-squared) test can performed. Chi-squared test statistic can calculated using: returns: 18.54007 critical values test statistic can found using: returns: 16.04707 45.72229 Chi-squared test statistic 16.04707 conclude pattern cases across primary sampling units example data uniform. case example data. Chi-squared test statistic 45.72229 conclude pattern cases across primary sampling units example data clumped. case example data. Since Chi-squared test statistic falls 16.04707 45.72229 conclude pattern cases across primary sampling units example data random. problems variance mean ratio. clearly non-random patterns can produce variance mean ratios one. variance mean ratio also strongly influenced total number cases present data clumping present. better measure Green’s Index Dispersion: \\[ \\text{Green's Index} ~ = ~  \\frac{ \\left ( \\frac{s ^ 2}{\\overline{\\chi}} \\right ) ~ - ~ 1}{n ~ - ~ 1} \\] Green’s Index corrects variance mean ratio total number cases present data. value Green’s Index can range $ -1 / (n - 1) $ maximum uniformity (specific dataset) one maximum clumping. interpretation Green’s Index straightforward: Green’s Index ≈ 0 Random Green’s Index > 0 Clumped (.e. clumped random) Green’s Index < 0 Uniform (.e. uniform random) sampling distribution Green’s Index well described. NiPN data quality toolkit provides greenIndex() function overcomes problem. R language function uses bootstrap technique estimate Green’s Index test whether distribution cases across primary sampling units random. greenIndex() function requires specify name survey dataset, name variable specifying primary sampling unit, name variable specifying case status. example data: returns: point estimate Green’s Index (-0.0013) zero p-value test random distribution cases across primary sampling units (0.0040) 0.05. distribution cases across primary sampling units example data significantly uniform random. can see graphically using:  dashed line plot marks mean number cases found primary sampling unit. uniform distribution show bars ending close line (see figure ). SMART uses variance mean ratio test data quality. Green’s Index robust choice can used compare samples vary overall sample size number sampling units used. idea behind using measure dispersion judge data quality belief distribution cases malnutrition across primary sampling units always random. case data considered suspect. problem approach deviations random can reflect true distribution cases survey area. may occur survey area comprises, example, one livelihood zone. also less likely case conditions, wasting oedema, associated infectious disease may clumped randomly distributed across primary sampling units. may become particular problem proximity sampling used collect within-cluster samples. Measures dispersion problematic used measures data quality interpreted caution. exception rule finding maximum, almost maximum, uniformity maximum, almost maximum, clumping. finding maximum uniformity likely data fabricated. finding maximum clumping may indicate poor data collection / poor data management.","code":"svy <- read.table(\"flag.ex01.csv\", header = TRUE, sep = \",\")  head(svy) #>   psu child age sex weight height muac oedema   haz   waz   whz #> 1   1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03 #> 2   1     2  13   2    6.4   70.4  116      2 -1.83 -3.04 -2.93 #> 3   1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25 #> 4   1     4  15   1    7.2   75.4  130      2 -1.48 -3.22 -3.57 #> 5   1     5  15   1    7.4   70.0  124      2 -3.61 -2.99 -1.61 #> 6   1     6  18   2    7.7   70.6  130      2 -3.48 -2.40 -0.82 svy$flag <- 0 svy$flag <- ifelse(!is.na(svy$haz) & (svy$haz < -6 | svy$haz > 6), svy$flag + 1, svy$flag) svy$flag <- ifelse(!is.na(svy$whz) & (svy$whz < -5 | svy$whz > 5), svy$flag + 2, svy$flag) svy$flag <- ifelse(!is.na(svy$waz) & (svy$waz < -6 | svy$waz > 5), svy$flag + 4, svy$flag) svy <- svy[svy$flag == 0, ] svy$stunted <- ifelse(svy$haz < -2, 1, 2) table(svy$psu, svy$stunted) #>      #>       1  2 #>   1   8 20 #>   2  11 14 #>   3   7 22 #>   4   6 23 #>   5   7 15 #>   6  11 20 #>   7  11 14 #>   8  14 12 #>   9  12 18 #>   10 10  9 #>   11 12 16 #>   12  9 13 #>   13  9 13 #>   14  5 21 #>   15 12  9 #>   16  8 17 #>   17  6 23 #>   18  8 21 #>   19 10 12 #>   20  6 20 #>   21 11 18 #>   22 11 14 #>   23 12  6 #>   24  8 15 #>   25 10 19 #>   26 10  8 #>   27 12  9 #>   28  6 14 #>   29 14 10 #>   30 11 18 table(svy$psu, svy$stunted)[,1] #>  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26  #>  8 11  7  6  7 11 11 14 12 10 12  9  9  5 12  8  6  8 10  6 11 11 12  8 10 10  #> 27 28 29 30  #> 12  6 14 11 barplot(table(svy$psu, svy$stunted)[,1], xlab = \"PSU\", ylab = \"Cases\", cex.names = 0.5) casesPerPSU <- table(svy$psu, svy$stunted)[,1]  casesPerPSU #>  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26  #>  8 11  7  6  7 11 11 14 12 10 12  9  9  5 12  8  6  8 10  6 11 11 12  8 10 10  #> 27 28 29 30  #> 12  6 14 11 varianceCasesPerPSU <- var(casesPerPSU) meanCasesPerPSU <- sum(casesPerPSU) / length(casesPerPSU)  V2M <- varianceCasesPerPSU / meanCasesPerPSU V2M #> [1] 0.6393127 sum((casesPerPSU - meanCasesPerPSU)^2) / meanCasesPerPSU #> [1] 18.54007 qchisq(p = c(0.025, 0.975), df = length(casesPerPSU) - 1) qchisq(p = c(0.025, 0.975), df = length(casesPerPSU) - 1) greensIndex(data = svy, psu = \"psu\", case = \"stunted\") #>  #>  Green's Index of Dispersion #>  #> Green's Index (GI) of Dispersion  = -0.0013, 95% CI = (-0.0022, -0.0004) #> Maximum uniformity for this data  = -0.0035 #>                          p-value  =  0.0000 table(svy$psu, svy$stunted)[,1] #>  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26  #>  8 11  7  6  7 11 11 14 12 10 12  9  9  5 12  8  6  8 10  6 11 11 12  8 10 10  #> 27 28 29 30  #> 12  6 14 11 barplot(table(svy$psu, svy$stunted)[,1], xlab = \"PSU\", ylab = \"Cases\", cex.names = 0.5) abline(h = sum(casesPerPSU) / length(casesPerPSU), lty = 2)"},{"path":"https://nutriverse.io/nipnTK/articles/ah.html","id":"summarising-tabulating-and-visualising-age-data","dir":"Articles","previous_headings":"","what":"Summarising, tabulating, and visualising age data","title":"Age heaping","text":"variable interest age (age months): Tables can difficult use ungrouped age data usually many different values: fullTable() function NiPN data-quality toolkit preferred include values zero counts: used fullTable() function returns table containing cells every value specified values parameter. returned table also contain cells values specified values parameter. default values parameter range variable tabulated. means values parameter can sometimes omitted: Omitting values parameter works reliably numeric variables containing whole numbers. variable tabulated character variable numeric variable containing one numbers decimal places specify values parameter. graphical analysis usually informative tabular analysis:  expect ages present roughly equal frequency frequency reducing slowly age due mortality. can see marked age-heaping 12, 18, 24, 30, 36, 48 months (see figure ). common age reported mothers. tendency mothers carers round ages whole years half years. Note used values = 6:59 fullTable() function NiPN data quality toolkit. range values present age variable.","code":"summary(svy$age) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>     6.0    18.0    30.0    30.4    42.0    59.0 table(svy$age) #>  #>  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31  #> 10 17 25 13 19 23 38 11 11 17  9 14 26  9 17 14 24 12 31  8 13  9 21 14 38 14  #> 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57  #> 16 23 22 18 57  8 13  9 11 12 19 10 13 14 12 14 44  6  9  6  5  8 12 13 12  8  #> 58 59  #> 13  9 fullTable(svy$age, values = 6:59) #>  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31  #> 10 17 25 13 19 23 38 11 11 17  9 14 26  9 17 14 24 12 31  8 13  9 21 14 38 14  #> 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57  #> 16 23 22 18 57  8 13  9 11 12 19 10 13 14 12 14 44  6  9  6  5  8 12 13 12  8  #> 58 59  #> 13  9 fullTable(svy$age) #>  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31  #> 10 17 25 13 19 23 38 11 11 17  9 14 26  9 17 14 24 12 31  8 13  9 21 14 38 14  #> 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57  #> 16 23 22 18 57  8 13  9 11 12 19 10 13 14 12 14 44  6  9  6  5  8 12 13 12  8  #> 58 59  #> 13  9 barplot(fullTable(svy$age, values = 6:59),          xlab = \"Age (months)\", ylab = \"Frequency\", las = 3, cex.names = 0.6)"},{"path":"https://nutriverse.io/nipnTK/articles/ah.html","id":"age-heaping-in-children","dir":"Articles","previous_headings":"","what":"Age heaping in children","title":"Age heaping","text":"Age heaping can seriously affect survey results indices include age component (e.g. height- -age weight-age). effect important systematic rounding systematic rounding . Systematic rounding can lead bias. rounding systematically indices biased upwards prevalence biased downwards. rounding systematically indices biased downwards prevalence biased upwards. useful way looking age heaping age recorded months examine remainders ages divided 12. R language provides special operator (%%) help :  NiPN data quality toolkit provides R language function called ageHeaping() performs age-heaping analysis. Applying function example data: returns: output ageHeaping() function can saved later use: saved output contains Chi-squared test frequency tables final digits (counts percentages). can accessed using: saved results may also plotted: resulting plot shown .  ageHeaping() function assumes want examine remainder dividing twelve. useful working ages recorded months. may also useful use divisors, examining remainder dividing six:  shows extent age heaping whole half-years (see figure ).","code":"rem <- svy$age %% 12 remTable <- fullTable(rem, values = 0:11) remTable #>   0   1   2   3   4   5   6   7   8   9  10  11  #> 170  33  46  41  46  48 105  63  83  72  90  76 prop.table(remTable) * 100 #>         0         1         2         3         4         5         6         7  #> 19.473081  3.780069  5.269187  4.696449  5.269187  5.498282 12.027491  7.216495  #>         8         9        10        11  #>  9.507446  8.247423 10.309278  8.705613 barplot(remTable, xlab = \"Age (months) %% 12\", ylab = \"Frequency\")  abline(h = sum(remTable / 12), lty = 3) chisq.test(remTable) #>  #>  Chi-squared test for given probabilities #>  #> data:  remTable #> X-squared = 214.96, df = 11, p-value < 2.2e-16 ageHeaping(svy$age) #>  #>  Age-heaping Analysis #>  #> data:    Remainder of svy$age / 12 #> X-squared = 214.9588, df = 11, p-value = 0.0000 ah12 <- ageHeaping(svy$age) ah12 #>  #>  Age-heaping Analysis #>  #> data:    Remainder of svy$age / 12 #> X-squared = 214.9588, df = 11, p-value = 0.0000 ah12$X2 #> X-squared  #>  214.9588 ah12$df #> df  #> 11 ah12$p #> [1] 5.791598e-40 ah12$tab #> Remainder of svy$age / 12 #>   0   1   2   3   4   5   6   7   8   9  10  11  #> 170  33  46  41  46  48 105  63  83  72  90  76 ah12$pct #> Remainder of svy$age / 12 #>    0    1    2    3    4    5    6    7    8    9   10   11  #> 19.5  3.8  5.3  4.7  5.3  5.5 12.0  7.2  9.5  8.2 10.3  8.7 plot(ah12, main = \"Age-heaping (remainder of age / 12)\") ah6 <- ageHeaping(svy$age, divisor = 6)  print(ah6) #>  #>  Age-heaping Analysis #>  #> data:    Remainder of svy$age / 6 #> X-squared = 145.0275, df = 5, p-value = 0.0000 plot(ah6)"},{"path":"https://nutriverse.io/nipnTK/articles/ah.html","id":"age-heaping-in-adults","dir":"Articles","previous_headings":"","what":"Age heaping in adults","title":"Age heaping","text":"Using ten five divisors can useful dealing data adults ages recorded whole years. example: file ah.ex01.csv comma-separated-value (CSV) file containing anthropometric data Rapid Assessment Method Older People (RAM-OP) survey Dadaab refugee camp Garissa, Kenya. survey people aged sixty years older. variable interest age (age years): Care exercised specifying divisor use analysis age heaping. calendars use base ten. Amongst Han Chinese, example, age heaping may occur twelve-year cycle corresponding preferred animal years Chinese calendar. analysis age heaping concentrates specific digits (e.g. zero five) decimal intervals appropriate populations. advisable, therefore use simple tabulation visualisation techniques heap decide appropriate divisor. example data:  shows age-heaping decades half-decades (see figure ). survey using divisor 10 appropriate:  pronounced age heaping decades , lesser extent, half-decades data (see figure ). may also useful use divisors, examining remainder dividing five:  shows extent age heaping whole half decades (see figure ).","code":"svy <- read.table(\"ah.ex01.csv\", header = TRUE, sep = \",\")  head(svy) svy <- ah.ex01  head(svy) summary(svy$age) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>     6.0    18.0    30.0    30.4    42.0    59.0 summary(svy$age) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>     6.0    18.0    30.0    30.4    42.0    59.0 fullTable(svy$age) #>  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31  #> 10 17 25 13 19 23 38 11 11 17  9 14 26  9 17 14 24 12 31  8 13  9 21 14 38 14  #> 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57  #> 16 23 22 18 57  8 13  9 11 12 19 10 13 14 12 14 44  6  9  6  5  8 12 13 12  8  #> 58 59  #> 13  9 barplot(fullTable(svy$age),          xlab = \"Age (years)\", ylab = \"Frequency\", las = 3, cex.names = 0.6) ah10 <- ageHeaping(svy$age, divisor = 10)  print(ah10) #>  #>  Age-heaping Analysis #>  #> data:    Remainder of svy$age / 10 #> X-squared = 70.31042, df = 9, p-value = 0.0000 plot(ah10) ah5 <- ageHeaping(svy$age, divisor = 5)  print(ah5) #>  #>  Age-heaping Analysis #>  #> data:    Remainder of svy$age / 5 #> X-squared = 10.39633, df = 4, p-value = 0.0343 plot(ah5)"},{"path":"https://nutriverse.io/nipnTK/articles/as.html","id":"age-and-sex-distributions-childrens-data","dir":"Articles","previous_headings":"","what":"Age and sex distributions (children’s data)","title":"Age and sex distributions","text":"Age heaping tendency report children’s ages nearest year adult ages nearest multiple 5 10 years. Age heaping common. major reason data nutritional anthropometry surveys often analysed reported using broad age-groups. commonest age-groups used children’s data 6 17 months, 18 29 months, 30 41 months, 42 53 months, 54 59 months (see figure ). known year-centred age-groups. Note last age-group covers six months nominally centred five years. age-groups may used specific analyses. techniques presented can adapted work age- groups.  retrieve survey dataset: dataset dp.ex02 comma-separated-value (CSV) file containing anthropometric data SMART survey Kabul, Afghanistan.","code":"svy <- read.table(\"dp.ex02.csv\", header = TRUE, sep = \",\")  head(svy) #>   psu age sex weight height muac oedema #> 1   1   6   1    7.3   65.0  146      2 #> 2   1  42   2   12.5   89.5  156      2 #> 3   1  23   1   10.6   78.1  149      2 #> 4   1  18   1   12.8   81.5  160      2 #> 5   1  52   1   12.1   87.3  152      2 #> 6   1  36   2   16.9   93.0  190      2"},{"path":"https://nutriverse.io/nipnTK/articles/as.html","id":"tabulation-and-visualisation","dir":"Articles","previous_headings":"Age and sex distributions (children’s data)","what":"Tabulation and visualisation","title":"Age and sex distributions","text":"NiPN data quality toolkit provides R language function called recode() makes easy recode group data. use recode() function group data age variable (age months) year-centred age-groups. tabular analysis can performed: table() function performs cross-tabulation. first variable specified (svy$ycag example) row variable. second variable specified (svy$sex example) column variable.  useful examine row percentages column percentages tables age-group sex. look row percentages: returns: shows approximately equal proportions males females year-centred age-group. specified margin = 1 prop.table() function wanted row percentages. also look column percentages: returns: expect approximately equal proportions children age-groups centred 1, 2, 3, 4 years smaller proportion (.e. half age-groups) age-group centred 5 years. specified margin = 2 prop.table() function wanted column percentages. graphical analysis using population pyramid can useful. NiPN data quality toolkit provides R language function called pyramid.plot() plotting population pyramids:  can make informative plot specifying title axis labels:  applying shading:  colours:  expect approximately equal numbers children age-groups centred 1, 2, 3, 4 years smaller number (.e. half number age-groups) age-group centred 5 years. also approximately equal numbers males females. see population pyramid .  pyramid.plot() function uses values grouped age variable y-axis value labels. can assign descriptive text values using recode() function. example:  can also use factor type variable. type variable allows labels specified:  cut() function may also used:  cut() function versatile grouping function. explained detail later section. cex.names parameter pyramid.plot() function allows us change size value labels y-axis. value cex.names magnification factor. Values one make labels larger default. Values one make labels smaller default.","code":"svy$ycag <- recode(svy$age, \"6:17=1; 18:29=2; 30:41=3; 42:53=4; 54:59=5\") head(svy) #>   psu age sex weight height muac oedema ycag #> 1   1   6   1    7.3   65.0  146      2    1 #> 2   1  42   2   12.5   89.5  156      2    4 #> 3   1  23   1   10.6   78.1  149      2    2 #> 4   1  18   1   12.8   81.5  160      2    2 #> 5   1  52   1   12.1   87.3  152      2    4 #> 6   1  36   2   16.9   93.0  190      2    3 table(svy$ycag, svy$sex)  #>     #>       1   2 #>   1 101 106 #>   2 102  96 #>   3 126 115 #>   4  78  82 #>   5  31  36 prop.table(table(svy$ycag, svy$sex)) * 100 #>     #>             1         2 #>   1 11.569301 12.142039 #>   2 11.683849 10.996564 #>   3 14.432990 13.172967 #>   4  8.934708  9.392898 #>   5  3.550974  4.123711 prop.table(table(svy$ycag, svy$sex), margin = 1) * 100 #>     #>            1        2 #>   1 48.79227 51.20773 #>   2 51.51515 48.48485 #>   3 52.28216 47.71784 #>   4 48.75000 51.25000 #>   5 46.26866 53.73134 prop.table(table(svy$ycag, svy$sex), margin = 2) * 100 #>     #>             1         2 #>   1 23.059361 24.367816 #>   2 23.287671 22.068966 #>   3 28.767123 26.436782 #>   4 17.808219 18.850575 #>   5  7.077626  8.275862 pyramid.plot(svy$ycag, svy$sex) pyramid.plot(svy$ycag, svy$sex,               main = \"Distribution of age by sex\",              xlab = \"Frequency (Males | Females)\",               ylab = \"Year-centred age-group\") pyramid.plot(svy$ycag, svy$sex,               main = \"Distribution of age by sex\",              xlab = \"Frequency (Males | Females)\",               ylab = \"Year-centred age-group\",              col = c(\"grey80\", \"white\")) pyramid.plot(svy$ycag, svy$sex,               main = \"Distribution of age by sex\",              xlab = \"Frequency (Males | Females)\",               ylab = \"Year-centred age-group\",              col = c(\"lightblue\", \"pink\")) pyramid.plot(svy$ycag, svy$sex,               main = \"Distribution of age by sex\",              xlab = \"Frequency (Males | Females)\",               ylab = \"Year-centred age-group\") svy$ageLabel <- recode(svy$age, \"6:29='< 30 months'; 30:hi='30 month or older'\") #> Warning in recode(svy$age, \"6:29='< 30 months'; 30:hi='30 month or older'\"): NAs #> introduced by coercion  pyramid.plot(svy$ageLabel,               svy$sex,               main = \"Distribution of age by sex\",               xlab = \"Frequency (Males | Females)\",               ylab = \"Age-group\") svy$ageLabel <- factor(svy$ycag,                        labels = c(\"6:17\", \"18:29\", \"30:41\", \"42:53\", \"54:59\"))  pyramid.plot(svy$ageLabel,               svy$sex,               main = \"Distribution of age by sex\",               xlab = \"Frequency (Males | Females)\",               ylab = \"Year-centred age-group\") svy$ageCuts <- cut(svy$age, breaks = c(0, 17, 29, 41, 53, 59))  pyramid.plot(svy$ageCuts,               svy$sex,               main = \"Age-group (months) \",              xlab = \"Frequency (Males | Females)\",               ylab = \"Year-centred age-group\",              cex.names = 0.9)"},{"path":"https://nutriverse.io/nipnTK/articles/as.html","id":"simple-testing","dir":"Articles","previous_headings":"","what":"Simple testing","title":"Age and sex distributions","text":"possible perform formal test distribution age-groups sex. simple test : yields: example p-value 0.05 accept null hypothesis significant association age sex. important test tests whether distribution ages similar males females. , however, test whether age structure sample meets expectations. requires test compares observed numbers expected numbers derived external source (e.g. census data) demographic model.","code":"chisq.test(table(svy$ycag, svy$sex)) #>  #>  Pearson's Chi-squared test #>  #> data:  table(svy$ycag, svy$sex) #> X-squared = 1.2675, df = 4, p-value = 0.8669"},{"path":"https://nutriverse.io/nipnTK/articles/as.html","id":"a-model-of-the-expected-age-structure","dir":"Articles","previous_headings":"Simple testing","what":"A model of the expected age structure","title":"Age and sex distributions","text":"simple model-based method calculating expected numbers exponential decay population births deaths balance 1:1 male female sex ratio. model proportion surviving group year can calculated : \\[ p ~ = ~ e ^ {-zt} \\] e base natural logarithm (approximately 2.7183), z mortality rate associated time period, t time. Time (t) starts zero purposes computation. Age can used measure time since birth. use 0 first year-centred age-group, 1 second year-centred age-group, -. rationale us using t <- 0:4 . five year-centred age-groups mortality rate 1 / 10,000 / day, expected proportions surviving year can calculated : yields following survival probabilities: need specify duration (.e. number years) represented age-group: can calculate expected proportions children age-group: gives: can now calculate expected numbers: giving: formal test compare observed numbers expected numbers. observed numbers can found using: gives: can useful examine observed expected numbers graphically:  calculate Chi-squared test statistic: \\[ \\chi ^ 2 ~ = ~ \\sum \\frac{(\\text{observed} - \\text{expected}) ^ 2}{\\text{expected}} \\] using: yields Chi-Squared test statistic : can find p-value using: gives: example age distribution significantly different expected numbers calculated using simple demographic model. Note specify degrees freedom (df) Chi-Squared test number age-groups minus one. five age-groups specify df = 4. degrees freedom (df) need specify depend number age-groups use. always number age-groups minus one. , example, ten age-groups need specify df = 9. NiPN data quality toolkit provides R function called ageChildren() performs model- based Chi-Squared test: returns: Note specified five years mortality rate 1 / 10,000 / day using u5mr = 1. Another, appropriate, rate may specified. ageChildren() function calculates year-centred age-groups children aged six fifty-nine months default. standard survey population used SMART many surveys. use year-centred age-groups also standard practice. commands given can, however, adapted use different age-groups. output ageChildren() function can saved later use: saved output contains Chi-squared test results tables observed expected values. can accessed using: saved results may also plotted:  ageChildren() function can applied sex separately. males:  females:  easier way : test statistics interpreted caution. significant test result may, example, due use inappropriate model generate expected numbers. significant result particular test may due : Specifying inappropriate five years mortality rate: particular problem specified five years mortality rate assumed applied five years prior data collected. assumption 1:1 male female sex ratio: particular problem setting sex-selective abortion sex-selective infanticide. model crude. Mortality related age. Younger children greater mortality risk older children average five years mortality rate used. sophisticated model used , many settings, data required use model. also noted sample sizes used survey can cause tests yield statistically significant results small differences observed expected numbers.","code":"z <- (1 / 10000) * 365.25   t <- 0:4  p <- exp(-z * t)  p z <- (1 / 10000) * 365.25   t <- 0:4  p <- exp(-z * t)  p #> [1] 1.0000000 0.9641340 0.9295544 0.8962149 0.8640713 d <- c(1, 1, 1, 1, 0.5) ep <- d * p / sum(d * p)   ep #> [1] 0.2368580 0.2283628 0.2201724 0.2122757 0.1023311 expected <- ep * sum(table(svy$ycag)) names(expected) <- 1:5  expected #>         1         2         3         4         5  #> 206.77703 199.36076 192.21049 185.31667  89.33505 observed <- table(svy$ycag)   observed #>  #>   1   2   3   4   5  #> 207 198 241 160  67 par(mfcol = c(1, 2)) barplot(observed, main = \"Observed\", xlab = \"Age group\", ylab = \"Frequency\", ylim = c(0, 250)) barplot(expected, main = \"Expected\", xlab = \"Age group\", ylab = \"Frequency\", ylim = c(0, 250)) X2 <- sum((observed - expected) ^ 2 / expected) pchisq(X2, df = 4, lower.tail = FALSE) #> [1] 0.000259395 ageChildren(svy$age, u5mr = 1) #>  #>  Age Test (Children) #>  #> X-squared = 21.4366, df = 4, p = 0.0003 ac <- ageChildren(svy$age, u5mr = 1) ac #>  #>  Age Test (Children) #>  #> X-squared = 21.4366, df = 4, p = 0.0003  ac$X2 #> [1] 21.43662  ac$df #> [1] 4  ac$p  #> [1] 0.000259395  ac$observed  #>   1   2   3   4   5  #> 207 198 241 160  67  ac$expected #>         1         2         3         4         5  #> 206.77703 199.36076 192.21049 185.31667  89.33505 plot(ac) acM <- ageChildren(svy$age[svy$sex == 1], u5mr = 1)   acM #>  #>  Age Test (Children) #>  #> X-squared = 15.8496, df = 4, p = 0.0032  plot(acM) acF <- ageChildren(svy$age[svy$sex == 2], u5mr = 1)   acF #>  #>  Age Test (Children) #>  #> X-squared = 6.8429, df = 4, p = 0.1444  plot(acF) by(svy$age, svy$sex, ageChildren, u5mr = 1) #> svy$sex: 1 #>  #>  Age Test (Children) #>  #> X-squared = 15.8496, df = 4, p = 0.0032 #>  #> ------------------------------------------------------------  #> svy$sex: 2 #>  #>  Age Test (Children) #>  #> X-squared = 6.8429, df = 4, p = 0.1444"},{"path":"https://nutriverse.io/nipnTK/articles/as.html","id":"use-of-census-data","dir":"Articles","previous_headings":"","what":"Use of census data","title":"Age and sex distributions","text":"use simple demographic models far ideal. usually better calculate expected proportions census data. useful source census data United States Census Bureau’s International Data Base: https://www.census.gov/data-tools/demo/idb/informationGateway.php population single year age-groups 0, 1, 2, 3, 4 years Afghanistan 2015 : can calculate expected values data: sample size \\(n = 900\\) expected number age-group : expected values can used Chi-squared test illustrated . Census data may also used estimate five years’ mortality rate (U5MR) can used ageChildren() function. model exponential decay population births deaths balance 1:1 male female sex ratio: \\[ p ~ = ~ e ^ {-zt} \\] means can, given age-distribution, estimate mortality fitting model: \\[ \\log_e(n) ~ = ~ \\alpha ~ + ~ \\beta t \\] \\(n\\) count children age-group. absolute value β coefficient point estimate mortality rate (z). Using 2015 population data Afghanistan: gives: value reported t \\(\\beta\\) coefficient (-0.04571). absolute value \\(\\beta\\) coefficient (.e. value without sign) 0.04571. point estimate mortality rate. Expressed number deaths / 10,000 persons / day: : can use estimate ageChildren() function:","code":"pop <- c(1148379, 1062635, 1015688, 981288, 950875)  ep <- pop / sum(pop) expected <- ep * 900 expected #> [1] 200.3427 185.3841 177.1939 171.1925 165.8868 t <- 0:4  lm(log(pop) ~ t) #>  #> Call: #> lm(formula = log(pop) ~ t) #>  #> Coefficients: #> (Intercept)            t   #>    13.93601     -0.04571 (0.04571 / 365.25) * 10000 #> [1] 1.251472 ageChildren(svy$age, u5mr = 1.251472) #>  #>  Age Test (Children) #>  #> X-squared = 20.4744, df = 4, p = 0.0004"},{"path":"https://nutriverse.io/nipnTK/articles/as.html","id":"the-age-ratio","dir":"Articles","previous_headings":"Use of census data","what":"The age ratio","title":"Age and sex distributions","text":"much simpler less problematic age-related test survey data quality age ratio test. age ratio defined : \\[ \\text{Age ratio} ~ = ~ \\frac{\\text{number children aged 6 29 months}}{\\text{number children aged 30 59 months}} \\] use recode() function NiPN data quality toolkit create relevant age-groups: observed age ratio : gives: often easier work proportions ratios need calculate proportion younger age-group: gives: can calculate expected value using census data simple demographic model. simplest approach use standard value. SMART surveys often use ratio 0.85:1. need calculate expected proportion younger group. ratio 0.85:1 : gives: observed proportion (0.4639175) expected proportion (0.4594595) similar formal test statistical significance required case. Formal testing can done using Chi-squared test: returns: age ratio example data significantly different expected age ratio. NiPN data quality toolkit provide R function called ageRatioTest() performs age ratio test: returns: ratio parameter ageRatioTest() function allows specify expected age ratio 0.85:1. Note ageRatioTest() function applies test data children aged 6 59 months (ages ignored). age ratio test might applied data sexes () sex separately: example data meets expectations regarding age ratio children male female children separately.","code":"svy$ageGroup <- recode(svy$age, \"6:29=1; 30:59=2\") head(svy) #>   psu age sex weight height muac oedema ycag ageLabel ageCuts ageGroup #> 1   1   6   1    7.3   65.0  146      2    1     6:17  (0,17]        1 #> 2   1  42   2   12.5   89.5  156      2    4    42:53 (41,53]        2 #> 3   1  23   1   10.6   78.1  149      2    2    18:29 (17,29]        1 #> 4   1  18   1   12.8   81.5  160      2    2    18:29 (17,29]        1 #> 5   1  52   1   12.1   87.3  152      2    4    42:53 (41,53]        2 #> 6   1  36   2   16.9   93.0  190      2    3    30:41 (29,41]        2 sum(svy$ageGroup == 1) / sum(svy$ageGroup == 2) #> [1] 0.8653846 sum(svy$ageGroup == 1) / sum(table(svy$ageGroup)) #> [1] 0.4639175 p <- 0.85 / (0.85 + 1) #> [1] 0.4594595 prop.test(sum(svy$ageGroup == 1), sum(table(svy$ageGroup)), p = 0.4594595) #>  #>  1-sample proportions test with continuity correction #>  #> data:  sum(svy$ageGroup == 1) out of sum(table(svy$ageGroup)), null probability 0.4594595 #> X-squared = 0.053062, df = 1, p-value = 0.8178 #> alternative hypothesis: true p is not equal to 0.4594595 #> 95 percent confidence interval: #>  0.4304994 0.4976573 #> sample estimates: #>         p  #> 0.4639175 ageRatioTest(svy$age, ratio = 0.85) #>  #>      Age Ratio Test (children's data) #>  #>                     Expected age ratio = 0.8500 #> Expected proportion aged 6 - 29 months = 0.4595 #>  #>                     Observed age ratio = 0.8654 #> Observed proportion aged 6 - 29 months = 0.4639 #>  #> X-squared = 0.0531, p = 0.8178 by(svy$age, svy$sex, ageRatioTest, ratio = 0.85) #> svy$sex: 1 #>  #>      Age Ratio Test (children's data) #>  #>                     Expected age ratio = 0.8500 #> Expected proportion aged 6 - 29 months = 0.4595 #>  #>                     Observed age ratio = 0.8638 #> Observed proportion aged 6 - 29 months = 0.4635 #>  #> X-squared = 0.0145, p = 0.9041 #>  #> ------------------------------------------------------------  #> svy$sex: 2 #>  #>      Age Ratio Test (children's data) #>  #>                     Expected age ratio = 0.8500 #> Expected proportion aged 6 - 29 months = 0.4595 #>  #>                     Observed age ratio = 0.8670 #> Observed proportion aged 6 - 29 months = 0.4644 #>  #> X-squared = 0.0247, p = 0.8750"},{"path":"https://nutriverse.io/nipnTK/articles/as.html","id":"age-and-sex-distributions-adults-and-general-population-surveys","dir":"Articles","previous_headings":"","what":"Age and sex distributions : Adults and general population surveys","title":"Age and sex distributions","text":"key test survey quality whether survey data represents population terms age sex distribution. can test comparison census data. retrieve example data: data taken household rosters collected part household survey Tanzania. use census data taken Wolfram|Alpha knowledge engine: http://www.wolframalpha.com/input/?=Tanzania+age+distribution Another useful source census data United States Census Bureau’s International Data Base: https://www.census.gov/data-tools/demo/idb/informationGateway.php pyramid plot produced Wolfram|Alpha shown figure .  table produced Wolfram|Alpha downloaded stored CSV file: age-groups expressed using form specified ISO 31-11, international standard applies mathematical symbols. form [,b) expresses interval \\(≤ x < b\\). example, [30,35) used indicate set {30, 31, 32, 33, 34} ages years. form [,b) said closed left open right. reference data (ref) uses five-year age-groups. create age-groups example dataset. first check range ages example data: returns: R language provides function makes easy create ISO 31-11 groupings raw data: Using include.lowest = TRUE tells cut() function include lowest breaks value (zero case). Using right = FALSE tells cut() function use groupings closed left. combination parameters creates “closed left” “open right” age-groups used reference (ref) data: tabular analysis age-group sex can produced using: visual inspection useful:  can make easier read:  Note specified ylab = \"\" clear category labels represent age-groups prevent y-axis label obscuring category labels, happens :  possible alter number lines text margins plot, reduce size age-group labels, place y-axis label specific line left margin plot order make clearer plot:  easiest way checking whether survey data represents general population terms age sex distribution compare observed (figure right) expected (figure left) distributions.  general shapes two distributions similar. lumpiness figure right due age heaping adult ages decades half-decades:  formal test age structure can made comparing observed expected numbers. can graphically:  observed expected numbers similar . lumpiness observed numbers due age heaping. See Figure ASA04. Formal testing can performed: gives: warning due small expected numbers (.e. n < 5) older age-groups. R provides robust “Monte Carlo” test: may take seconds compute yields: test results need interpreted caution. sample size (\\(n = 8736\\)) large example. means small differences, may due age heaping, become statistically significant. test considered good evidence age-structure sample differs expected age-structure population. also need examine sex ratio sample. sex ratio test can performed using sexRatioTest() function NiPN data quality toolkit sex ratio observed census data: yields: evidence sex ratio sample differs much expected sex ratio population. techniques outlined section illustrative. many surveys, nutritional anthropometry surveys young children, standardised. survey may sample women child-bearing age. sample may restricted women aged 15 45 years. case age-structure can examined using techniques outlined make sense examine sex ratio. Care taken examining data surveys may deliberately oversampled specific age-groups.","code":"svy <- read.table(\"as.ex01.csv\", header = TRUE, sep = \",\")  head(svy) #>   age sex #> 1  44   2 #> 2   1   2 #> 3  15   2 #> 4   7   1 #> 5  14   1 #> 6  14   1 ref <- read.table(\"as.ex02.csv\", header = TRUE, sep = \",\") ref #>         age   Males Females     All #> 1     [0,5) 4043000 3969000 8012000 #> 2    [5,10) 3336000 3284000 6620000 #> 3   [10,15) 2775000 2742000 5517000 #> 4   [15,20) 2386000 2372000 4758000 #> 5   [20,25) 2076000 2073000 4149000 #> 6   [25,30) 1753000 1750000 3503000 #> 7   [30,35) 1453000 1432000 2885000 #> 8   [35,40) 1142000 1099000 2241000 #> 9   [40,45)  873000  846000 1719000 #> 10  [45,50)  673000  699000 1372000 #> 11  [50,55)  538000  601000 1139000 #> 12  [55,60)  433000  503000  936000 #> 13  [60,65)  357000  426000  783000 #> 14  [65,70)  266000  319000  585000 #> 15  [70,75)  182000  222000  404000 #> 16  [75,80)  108000  137000  245000 #> 17  [80,85)   51000   68000  119000 #> 18  [85,90)   17000   25000   42000 #> 19  [90,95)    3000    6000    9000 #> 20 [95,100)       0    1000    1000 range(svy$age) #> [1]  0 93 svy$ageGroup <-cut(svy$age,                     breaks = seq(from = 0, to = 95, by = 5),                    include.lowest = TRUE, right = FALSE) table(svy$ageGroup) #>  #>   [0,5)  [5,10) [10,15) [15,20) [20,25) [25,30) [30,35) [35,40) [40,45) [45,50)  #>    1598    1268    1072     808     870     575     580     385     424     258  #> [50,55) [55,60) [60,65) [65,70) [70,75) [75,80) [80,85) [85,90) [90,95]  #>     284     128     165      82      98      51      60      18      12 table(svy$ageGroup, svy$sex) #>           #>             1   2 #>   [0,5)   821 777 #>   [5,10)  637 631 #>   [10,15) 547 525 #>   [15,20) 389 419 #>   [20,25) 342 528 #>   [25,30) 343 232 #>   [30,35) 250 330 #>   [35,40) 177 208 #>   [40,45) 206 218 #>   [45,50) 125 133 #>   [50,55) 162 122 #>   [55,60)  70  58 #>   [60,65)  87  78 #>   [65,70)  33  49 #>   [70,75)  47  51 #>   [75,80)  22  29 #>   [80,85)  24  36 #>   [85,90)  10   8 #>   [90,95]   1  11 pyramid.plot(svy$ageGroup, svy$sex) pyramid.plot(svy$ageGroup,               svy$sex,               main = \"Age-group by sex\",              xlab = \"Number (Males | Females)\",               ylab = \"\",               las = 1,               cex.names = 0.9) pyramid.plot(svy$ageGroup,               svy$sex,               main = \"Age-group by sex\",              xlab = \"Number (Males | Females)\",               ylab = \"Age-group\",               las = 1,              cex.names = 0.9) par(mar = c(5, 5, 4, 2))  pyramid.plot(svy$ageGroup,               svy$sex,               main = \"Age-group by sex\",              xlab = \"Number (Males | Females)\",               ylab = \"\",               las = 1,               cex.names = 0.8)  title(ylab = \"Age-group\", line = 4) ah <- ageHeaping(svy$age, divisor = 10)  plot(ah, main = \"Remainder of age / 10\") ref <- ref[1:19, ]  expectedProportions <- ref$All / sum(ref$All) expectedNumbers <- expectedProportions * sum(table(svy$ageGroup))  mp <- barplot(table(svy$ageGroup),                main = \"Observed and expected numbers\",                ylim = c(0, max(expectedNumbers)),                las = 2)  lines(mp, expectedNumbers, lty = 2, lwd = 2) chisq.test(table(svy$ageGroup),             p = expectedProportions) #> Warning in chisq.test(table(svy$ageGroup), p = expectedProportions): Chi-squared #> approximation may be incorrect #>  #>  Chi-squared test for given probabilities #>  #> data:  table(svy$ageGroup) #> X-squared = 248.41, df = 18, p-value < 2.2e-16 chisq.test(table(svy$ageGroup),             p = expectedProportions,             simulate.p.value = TRUE) #>  #>  Chi-squared test for given probabilities with simulated p-value (based #>  on 2000 replicates) #>  #> data:  table(svy$ageGroup) #> X-squared = 248.41, df = NA, p-value = 0.0004998 censusM <- sum(ref$Males) censusF <- sum(ref$Females)  sexRatioTest(svy$sex,               codes = c(1, 2),               pop = c(censusM, censusF)) #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.4988 #> Observed proportion male = 0.4914 #> X-squared = 1.8770, p = 0.1707"},{"path":"https://nutriverse.io/nipnTK/articles/dp.html","id":"digit-preference-in-anthropometric-measurements","dir":"Articles","previous_headings":"","what":"Digit preference in anthropometric measurements","title":"Digit preference","text":"Measurements nutritional anthropometry surveys usually taken recorded one decimal place. Examples given table . Common measurements used anthropometric surveys Digit preference observation final number measurement occurs greater frequency expected chance. can occur rounding, practice increasing decreasing value measurement nearest whole half unit, data made . taking recording measurements field common field staff round first value decimal point zero five. Measurements whole numbers may also rounded nearest decade (e.g. 137 mm may rounded 140 mm) half-decade (e.g. 137 mm may rounded 135 mm). small number rounded measurements unlikely affect survey results. large number rounded measurements can affect survey results particularly measurements systematically rounded one direction. form bias. Fictitious data often shows digit preference (e.g.) ”2” “6” appearing final digits much frequently expected. happens , without using computer, large quantity random data much harder fake merely random-looking data. little digit preference anthropometric data expect final recorded digit measurement occur approximately equal frequency. can check digit preference absent data testing whether case. use R Language Data Analysis Graphics illustrate can done.","code":""},{"path":"https://nutriverse.io/nipnTK/articles/dp.html","id":"tabulation-and-visualisation","dir":"Articles","previous_headings":"","what":"Tabulation and visualisation","title":"Digit preference","text":"First work artificial data: use set.seed() resets pseudorandom number generator. ensures results shown get follow example analyses. always examine data performing formal tests. table can useful: returns: can look proportions instead counts: returns: prefer working percentages : returns: Examining data graphically useful: can add line showing expectation final digit occur 10% time: resulting plot shown .  tabular graphical analyses consistent little digit preference generated data. analyses agree expectation final digit occur 10% time. seeing random variation. can use formal test confirm : returns: example p-value 0.05 accept null hypothesis digit preference. important check digit zero nine represented tables plots. Missing digits can indicate strong digit preference. NiPN data quality toolkit provides fullTable() function. R language function produces table includes cells zero counts. example remove values final digit equal 6 generated data: see effect:  misleading analysis. easy miss final digits equal 6 data. plot misleading final digit 6 represented assumed ten rather nine final digits calculated expected frequencies. Chi-squared test correct account zero cases final digit equal 6. fullTable() function avoids issues:  Chi-squared test (incorrectly) calculated without zero cell: indicates problem data. chi-square test (correctly) calculated zero cell: indicates problem data. Note use sum(fullTable(finalDigits)) / 10 (.e. divide ten) know ten final digits (.e. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9). issue using hypothesis test chi-squared test. Test values strongly influenced sample size yielding false-negative results used small sample sizes false-positive results used large sample sizes. can illustrate generating new artificial data marked digit preference: creates table object containing counts imaginary final digits. Looking data:  marked digit preference zero five (see figure ). Chi-squared test: returns: example Chi-squared test failed detect marked digit preference. false negative test result. failure Chi-squared test example due small number observations (.e. n = 60) used analysis. tabular graphical analysis required identify digit preference problem example. usually working large sample sizes. can bring problem false positives. generate data: data approximate properties set true uniformly random numbers. digit preference might observe data due solely chance. generated data appear exhibit digit preference:  digit preference especially marked. Chi-squared test: yields: suggests significant digit preference. false positive result generated data constrained uniformly random digit preference observed due solely chance. failure Chi-squared test example due test mistaking random variation digit preference , part, due use large (.e. \\(n ~ = ~ 1000\\)) number observations. also important note test p < 0.05 significance threshold generate positive result 1 20 tests data exhibiting nothing random variation. tests p < 0.05 significance threshold 5% false positive rate.","code":"set.seed(0) finalDigits <- sample(x = 0:9, size = 1000, replace = TRUE) table(finalDigits) #> finalDigits #>   0   1   2   3   4   5   6   7   8   9  #>  95  80  96 102 106  98 109  95 109 110 prop.table(table(finalDigits)) #> finalDigits #>     0     1     2     3     4     5     6     7     8     9  #> 0.095 0.080 0.096 0.102 0.106 0.098 0.109 0.095 0.109 0.110 prop.table(table(finalDigits)) * 100 #> finalDigits #>    0    1    2    3    4    5    6    7    8    9  #>  9.5  8.0  9.6 10.2 10.6  9.8 10.9  9.5 10.9 11.0 barplot(table(finalDigits), xlab = \"Final digit\", ylab = \"Frequency\") abline(h = sum(table(finalDigits)) / 10, lty = 3) chisq.test(table(finalDigits)) #>  #>  Chi-squared test for given probabilities #>  #> data:  table(finalDigits) #> X-squared = 7.72, df = 9, p-value = 0.5626 finalDigits[finalDigits == 6] <- NA table(finalDigits) #> finalDigits #>   0   1   2   3   4   5   7   8   9  #>  95  80  96 102 106  98  95 109 110 prop.table(table(finalDigits)) * 100 #> finalDigits #>         0         1         2         3         4         5         7         8  #> 10.662177  8.978676 10.774411 11.447811 11.896745 10.998878 10.662177 12.233446  #>         9  #> 12.345679 barplot(table(finalDigits), xlab = \"Final digit\", ylab = \"Frequency\")  abline(h = sum(table(finalDigits)) / 10, lty = 3) chisq.test(table(finalDigits)) #>  #>  Chi-squared test for given probabilities #>  #> data:  table(finalDigits) #> X-squared = 6.8889, df = 8, p-value = 0.5487 fullTable(finalDigits) #>   0   1   2   3   4   5   6   7   8   9  #>  95  80  96 102 106  98   0  95 109 110 prop.table(fullTable(finalDigits)) * 100 #>         0         1         2         3         4         5         6         7  #> 10.662177  8.978676 10.774411 11.447811 11.896745 10.998878  0.000000 10.662177  #>         8         9  #> 12.233446 12.345679 barplot(fullTable(finalDigits), xlab = \"Final digit\", ylab = \"Frequency\")  abline(h = sum(fullTable(finalDigits)) / 10, lty = 3) chisq.test(fullTable(finalDigits)) #>  #>  Chi-squared test for given probabilities #>  #> data:  fullTable(finalDigits) #> X-squared = 106.65, df = 9, p-value < 2.2e-16 #>  #>  Chi-squared test for given probabilities #>  #> data:  table(finalDigits) #> X-squared = 6.8889, df = 8, p-value = 0.5487 #>  #>  Chi-squared test for given probabilities #>  #> data:  fullTable(finalDigits) #> X-squared = 106.65, df = 9, p-value < 2.2e-16 finalDigits <- as.table(x = c(11, 7, 5, 4, 7, 11, 5, 4, 4, 2))  names(finalDigits) <- 0:9 finalDigits #>  0  1  2  3  4  5  6  7  8  9  #> 11  7  5  4  7 11  5  4  4  2 prop.table(finalDigits) * 100 #>         0         1         2         3         4         5         6         7  #> 18.333333 11.666667  8.333333  6.666667 11.666667 18.333333  8.333333  6.666667  #>         8         9  #>  6.666667  3.333333 barplot(finalDigits, xlab = \"Final digit\", ylab = \"Frequency\")  abline(h = sum(finalDigits) / 10, lty = 3) chisq.test(finalDigits) #>  #>  Chi-squared test for given probabilities #>  #> data:  finalDigits #> X-squared = 13.667, df = 9, p-value = 0.1347 set.seed(3) finalDigits <- sample(x = 0:9, size = 1000, replace = TRUE) table(finalDigits) #> finalDigits #>   0   1   2   3   4   5   6   7   8   9  #> 102 104  96  88 103 115  91  86 105 110 prop.table(fullTable(finalDigits)) * 100 #>    0    1    2    3    4    5    6    7    8    9  #> 10.2 10.4  9.6  8.8 10.3 11.5  9.1  8.6 10.5 11.0 barplot(fullTable(finalDigits), xlab = \"Final digit\", ylab = \"Frequency\")  abline(h = sum(fullTable(finalDigits)) / 10, lty = 3) chisq.test(fullTable(finalDigits)) #>  #>  Chi-squared test for given probabilities #>  #> data:  fullTable(finalDigits) #> X-squared = 8.16, df = 9, p-value = 0.5181"},{"path":"https://nutriverse.io/nipnTK/articles/dp.html","id":"avoiding-false-positives-using-the-digit-preference-score","dir":"Articles","previous_headings":"","what":"Avoiding false positives using the digit preference score","title":"Digit preference","text":"problem false-positives can addressed using summary measure takes effect sample size account. widely used method digit preference score (DPS). DPS developed MONICA project: http://www.thl.fi/publications/monica/bp/bpqa.htm DPS corrects Chi-squared statistic (\\(\\chi ^ 2\\)) sample size (n) degrees freedom (df) test: \\[ DPS ~ = ~ 100 ~ \\times ~ \\sqrt{\\frac{\\chi ^ 2}{n ~ \\times ~ df}} \\] effect “desensitising” Chi-squared test. DPS can used anthropometric data types surveys may also applied clinical data. low DPS value indicates little digit preference. high DPS value indicates considerable digit preference. Guideline values DPS shown table . Guideline thresholds DPS NiPN data quality toolkit provides R language function digitPreference() calculating DPS. Applying function example data: yields: consistent little digit preference example data. output digitPreference() function can saved later use: saved output contains DPS value frequency tables final digits (counts percentages). can accessed using: saved results may also plotted: resulting plot shown .  now practice using digitPreference() function survey data. start retrieving survey data: file dp.ex01.csv comma-separated-value (CSV) file containing anthropometric data single state DHS survey West African country. first records dataset can seen using: returns: two variables interest wt (weight) ht (height). can examine digit preference variable weight (wt) using: returns: can plot digit preference using: resulting plot shown .  weight data shows digit preference classified “Good” using classifications shown table . can examine digit preference variable height (ht) using:  DPS value (22.77) DPS plot () show considerable digit preference height (ht) variable. classified “Problematic” using classifications shown table . Note specified digits = 1 used digitPreference() function weight height data example DHS data. variables measured recorded one decimal place. using digitPreference() function MUAC data measured recorded whole numbers (.e. decimal places) specify digits = 0. example: file dp.ex02.csv comma-separated-value (CSV) file containing anthropometric data SMART survey Kabul, Afghanistan. first records dataset can seen using: returns: variable interest muac (MUAC). variable measured recorded whole millimetres. can examine digit preference MUAC variable using:  DPS value (13.08) DPS plot () show considerable digit preference classified “Acceptable” using classifications shown table .","code":"digitPreference(finalDigits, digits = 0) #>  #>  Digit Preference Score #>  #> data:    finalDigits #> Digit Preference Score (DPS) = 3.01 (Excellent) dpsResults <- digitPreference(finalDigits, digits = 0) dpsResults$dps  #> [1] 3.01 dpsResults$tab  #> finalDigits #>   0   1   2   3   4   5   6   7   8   9  #> 102 104  96  88 103 115  91  86 105 110 dpsResults$pct  #> finalDigits #>    0    1    2    3    4    5    6    7    8    9  #> 10.2 10.4  9.6  8.8 10.3 11.5  9.1  8.6 10.5 11.0 dpsResults$dpsClass #> SMART DPS Class  #>     \"Excellent\" plot(dpsResults, main = \"finalDigit example data\") svy <- read.table(\"dp.ex01.csv\", header = TRUE, sep = \",\") head(svy) #>   psu age sex   wt   ht oedema #> 1 330  14   1  5.0 65.6      2 #> 2 330  54   2 12.1 99.0      2 #> 3 330  25   1  8.9 59.5      2 #> 4 330  52   1 14.6 98.0      2 #> 5 330  43   1 10.1 99.1      2 #> 6 330   7   1  4.0 58.1      2 digitPreference(svy$wt, digits = 1) #>  #>  Digit Preference Score #>  #> data:    svy$wt #> Digit Preference Score (DPS) = 11.86 (Good) plot(digitPreference(svy$wt, digits = 1), main = \"Weight\") digitPreference(svy$ht, digits = 1)  #>  #>  Digit Preference Score #>  #> data:    svy$ht #> Digit Preference Score (DPS) = 22.77 (Problematic) plot(digitPreference(svy$ht, digits = 1), main = \"Height\") svy <- read.table(\"dp.ex02.csv\", header = TRUE, sep = \",\") head(svy) #>   psu age sex weight height muac oedema #> 1   1   6   1    7.3   65.0  146      2 #> 2   1  42   2   12.5   89.5  156      2 #> 3   1  23   1   10.6   78.1  149      2 #> 4   1  18   1   12.8   81.5  160      2 #> 5   1  52   1   12.1   87.3  152      2 #> 6   1  36   2   16.9   93.0  190      2 digitPreference(svy$muac, digits = 0)  #>  #>  Digit Preference Score #>  #> data:    svy$muac #> Digit Preference Score (DPS) = 13.08 (Acceptable) plot(digitPreference(svy$muac, digits = 0), main = \"MUAC\")"},{"path":"https://nutriverse.io/nipnTK/articles/dp.html","id":"some-warnings","dir":"Articles","previous_headings":"","what":"Some warnings","title":"Digit preference","text":"material presented assumed data recorded fixed precision (e.g. one decimal place weight height, decimal places MUAC). may case data recorded mixed precision. example, weights younger children may measured using “baby scales” recorded nearest 10 g (.e. two decimal places) weights older children measured using “hanging scales” recorded nearest 100 g (.e. one decimal place). sorts situations can difficult handle automatically since (e.g.) 3.1 3.10 number stored way. easiest approach treat data two separate datasets examining digit preference. Care taken ensure mistake limitations measuring instrument digit preference. example, designs MUAC tape can return measurements even number final digit. case never see MUAC measurements 1, 3, 5, 7, 9 final digit. limitation instrument look like digit preference. digitPreference() function can handle situation. retrieve dataset: file dp.ex03.csv comma-separated-value (CSV) file containing anthropometric data sample children living refugee camp West African country. MUAC measured using “numbers boxes” design MUAC tape:  can even numbers final digit type MUAC tape used. check : returns: even numbers. odd number recording error data-entry error. can examine digit preference data using digitPreference() function: returns: misleading digitPreference() function assumes possible final digits (.e. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9) present. case example data. can examine using: returns: can use values parameter digitPreference() specify values allowed final digit: returns: DPS moved 33.34 (“Problematic”) 0.78 (“Excellent”). can tabulate plot frequency final digits muac variable:","code":"svy <- read.table(\"dp.ex03.csv\", header = TRUE, sep = \",\")  head(svy) #>   age sex weight height muac oedema #> 1  36   2   12.4   86.9  150      2 #> 2  39   2   10.9   83.5  146      2 #> 3  29   1   11.6   90.6  138      2 #> 4  47   2   14.6   95.5  170      2 #> 5  16   1   10.4   78.9  154      2 #> 6  23   1    8.9   80.8  146      2 table(svy$muac) #>  #> 108 114 118 120 122 124 126 128 130 132 134 136 138 140 142 144 146 148 150 152  #>   1   1   3   3   2   6   5   5  21   8  16  23  20  16  32  26  24  22  16  25  #> 154 156 158 160 162 164 166 168 170 174 176 178  #>  16  14  19   8   7   7   9   3  11   2   2   1 digitPreference(svy$muac, digits = 0) #>  #>  Digit Preference Score #>  #> data:    svy$muac #> Digit Preference Score (DPS) = 33.34 (Problematic) digitPreference(svy$muac, digits = 0)$tab #> svy$muac #>  0  1  2  3  4  5  6  7  8  9  #> 75  0 74  0 74  0 77  0 74  0 digitPreference(svy$muac, digits = 0, values = c(0, 2, 4, 6, 8)) #>  #>  Digit Preference Score #>  #> data:    svy$muac #> Digit Preference Score (DPS) = 0.78 (Excellent) dpsResults <- digitPreference (svy$muac, digits = 0, values = c(0, 2, 4, 6, 8))  dpsResults$tab #> svy$muac #>  0  2  4  6  8  #> 75 74 74 77 74 dpsResults$pct #> svy$muac #>    0    2    4    6    8  #> 20.1 19.8 19.8 20.6 19.8 plot(dpsResults)"},{"path":"https://nutriverse.io/nipnTK/articles/flagging.html","id":"applying-who-flagging-criteria-to-survey-data","dir":"Articles","previous_headings":"","what":"Applying WHO flagging criteria to survey data","title":"Identifying outliers using flags","text":"first exercise, apply flagging criteria survey data. retrieve survey dataset: file flag.ex01.csv comma-separated-value (CSV) file containing anthropometric data recent SMART survey Sudan. Applying flagging criteria straightforward. first create column contain flag code set zero (.e. flags) records: apply flagging criteria index. apply flagging criteria HAZ index: can translated “HAZ missing HAZ -6 HAZ +6 add 1 flag variable else leave flag variable unchanged”. careful using \\(<\\) comparison operator negative numbers. Always insert space \\(<\\) \\(–\\) characters. R interprets \\(<-\\) assignment operator may produce unexpected unwanted results without issuing warning error message. apply flagging criteria WHZ index: apply flagging criteria WAZ index: Note time apply flagging criteria increase value flagging variable next power two problem detected: another index use \\(2 ^ 3\\) (.e. 8) flag problem index. advantage using coding scheme compactly codes possible combinations problems single variable (see table ). number flagged records example dataset. : returns: table shows relative frequency detected problems. See table find meaning codes.   Flagging codes based powers two meanings   number flagged records can found using: returns: proportion records flagged can found using: returns: 4.45% records flagged. Note missing values flagged. can useful check missing values see missing component measurements component measurement range calculation index values (e.g. WAZ calculated children aged ten years younger). issue can explored selection listing. example: returns: one missing value whz record 8.due missing value height (shown NA). haz also missing. may possible fix issue missing data available paper forms. Flagging dual role: data-checking tool. access data collection forms often able check records fix data-entry errors data. measure data-quality. Flagged records can indicate problems measurement, recording, data-entry, data-checking. proportion flagged records dataset , ideally, 2.5%. SMART guidelines consider proportions 7.5% problematic. found 4.45% records example dataset flagged. data acceptable quality. can use: display flagged records. : produces compact list. example dataset records identified using combination psu child variables. listed records can checked edited (see previous table). Anthropometric indices can recalculated flagging process repeated records can fixed fixed. Records fixed can censored analysis. Records usually censored index--index basis. example, analysis based WHZ censor records flag variable 2, 3, 6, 7. Table shows censoring rules index:   Censoring rules index   careful applying censoring rules. analysis prevalence using WHZ, example, usually include children oedema commonly used case-definition acute malnutrition : \\[ \\text{WHZ} < -2 ~ \\text{bilateral pitting oedema} \\] want use case-definitions include oedema careful exclude children oedema censoring flagged records. analysis using WAZ might want exclude oedema cases.","code":"svy <- read.table(\"flag.ex01.csv\", header = TRUE, sep = \",\") #>   psu child age sex weight height muac oedema   haz   waz   whz #> 1   1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03 #> 2   1     2  13   2    6.4   70.4  116      2 -1.83 -3.04 -2.93 #> 3   1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25 #> 4   1     4  15   1    7.2   75.4  130      2 -1.48 -3.22 -3.57 #> 5   1     5  15   1    7.4   70.0  124      2 -3.61 -2.99 -1.61 #> 6   1     6  18   2    7.7   70.6  130      2 -3.48 -2.40 -0.82 svy$flag <- 0 #>   psu child age sex weight height muac oedema   haz   waz   whz flag #> 1   1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03    0 #> 2   1     2  13   2    6.4   70.4  116      2 -1.83 -3.04 -2.93    0 #> 3   1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25    0 #> 4   1     4  15   1    7.2   75.4  130      2 -1.48 -3.22 -3.57    0 #> 5   1     5  15   1    7.4   70.0  124      2 -3.61 -2.99 -1.61    0 #> 6   1     6  18   2    7.7   70.6  130      2 -3.48 -2.40 -0.82    0 svy$flag <- ifelse(!is.na(svy$haz) & (svy$haz < -6 | svy$haz > 6), svy$flag + 1, svy$flag) #>   psu child age sex weight height muac oedema   haz   waz   whz flag #> 1   1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03    0 #> 2   1     2  13   2    6.4   70.4  116      2 -1.83 -3.04 -2.93    0 #> 3   1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25    0 #> 4   1     4  15   1    7.2   75.4  130      2 -1.48 -3.22 -3.57    0 #> 5   1     5  15   1    7.4   70.0  124      2 -3.61 -2.99 -1.61    0 #> 6   1     6  18   2    7.7   70.6  130      2 -3.48 -2.40 -0.82    0 svy$flag <- ifelse(!is.na(svy$whz) & (svy$whz < - 5 | svy$whz > 5), svy$flag + 2, svy$flag) #>   psu child age sex weight height muac oedema   haz   waz   whz flag #> 1   1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03    2 #> 2   1     2  13   2    6.4   70.4  116      2 -1.83 -3.04 -2.93    0 #> 3   1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25    0 #> 4   1     4  15   1    7.2   75.4  130      2 -1.48 -3.22 -3.57    0 #> 5   1     5  15   1    7.4   70.0  124      2 -3.61 -2.99 -1.61    0 #> 6   1     6  18   2    7.7   70.6  130      2 -3.48 -2.40 -0.82    0 svy$flag <- ifelse(!is.na(svy$waz) & (svy$waz < - 6 | svy$waz > 5), svy$flag + 4, svy$flag) #>   psu child age sex weight height muac oedema   haz   waz   whz flag #> 1   1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03    2 #> 2   1     2  13   2    6.4   70.4  116      2 -1.83 -3.04 -2.93    0 #> 3   1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25    0 #> 4   1     4  15   1    7.2   75.4  130      2 -1.48 -3.22 -3.57    0 #> 5   1     5  15   1    7.4   70.0  124      2 -3.61 -2.99 -1.61    0 #> 6   1     6  18   2    7.7   70.6  130      2 -3.48 -2.40 -0.82    0 We started with zero  Then we added $2 ^ 0$ (i.e. 1) if HAZ was out of range.   Then we added $2 ^ 1$ (i.e. 2) if WHZ was out of range.   Then we added $2 ^ 2$ (i.e. 4) if WAZ was out of range. table(svy$flag) #>  #>   0   1   2   3   5   6  #> 751   9  12   9   2   3 table(svy$flag != 0)[\"TRUE\"] #> TRUE  #>   35 prop.table(table(svy$flag != 0))[\"TRUE\"] #>       TRUE  #> 0.04452926 svy[is.na(svy$whz), c(\"weight\", \"height\", \"whz\")] #>   weight height whz #> 8    8.1     NA  NA svy[svy$flag != 0, ] #>     psu child age sex weight height muac oedema   haz   waz   whz flag #> 1     1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03    2 #> 29    1    29  24   2   16.3  107.3  155      2  6.69  2.69 -0.82    1 #> 32    2     1  12   1    6.1   99.4  112      2  9.95 -4.02 -9.18    3 #> 35    2     4  24   2    6.8   65.5  128      2 -6.27 -4.30 -0.63    1 #> 88    3    30  24   2   16.9  107.5  158      2  6.75  2.95 -0.47    1 #> 106   4    18  36   1   13.4   65.7  152      2 -8.20 -0.56  7.64    3 #> 174   7     3  36   2    6.8   66.6  134      2 -7.47 -5.35 -1.01    1 #> 198   8     1  27   2    5.5   66.0  112      2 -6.59 -5.92 -3.27    1 #> 280  11     7  24   1    6.7   81.7  140      2 -1.77 -4.86 -5.63    2 #> 286  11    13  48   1    9.4   77.3  146      2 -6.21 -4.25 -0.69    1 #> 292  11    19  12   1   12.9   92.3  152      2  6.97  2.68 -0.50    1 #> 307  12     3  36   1    7.5   90.0  130      2 -1.64 -4.99 -6.42    2 #> 350  14     1  20   1    5.7   77.8  142      2 -2.27 -5.49 -6.47    2 #> 352  14     3  48   1    6.5   80.7  140      2 -5.40 -6.22 -5.74    6 #> 368  14    19  48   1   13.4   66.3  144      2 -8.83 -1.58  7.33    3 #> 399  15    21  36   1   14.3   66.0  154      2 -8.12 -0.02  8.58    3 #> 400  15    22  48   1   14.5   68.0  152      2 -8.42 -0.95  7.80    3 #> 405  16     4  24   2    7.8   65.0  145      2 -6.42 -3.27  1.04    1 #> 406  16     5  12   1    7.8   98.0  138      2  9.36 -1.93 -7.23    3 #> 408  16     7  48   1    8.0   77.0  128      2 -6.28 -5.20 -2.66    1 #> 432  17     3   6   1    7.9   98.4  138      2 14.38 -0.04 -7.18    3 #> 433  17     4  48   2    8.3   94.9  136      2 -1.82 -4.79 -5.63    2 #> 490  19     1  12   2    5.3   72.0  152      2 -0.78 -4.27 -5.30    2 #> 591  22    24  36   1   14.0   69.0  152      2 -7.31 -0.20  6.77    3 #> 594  23     1  36   1    5.4   80.0  140      2 -4.34 -6.66 -7.27    6 #> 595  23     2  36   1    5.9   72.0  114      2 -6.50 -6.26 -4.96    5 #> 596  23     3  24   1    6.3   77.0  130      2 -3.31 -5.24 -5.38    2 #> 599  23     6  36   1    6.5   80.0  130      2 -4.34 -5.79 -5.61    2 #> 616  23    23  36   1   16.0   74.0  144      2 -5.96  0.90  6.82    2 #> 640  25     1  12   2    6.3   99.3  110      2  9.82 -2.96 -8.25    3 #> 641  25     2  48   2    6.7   85.0  140      2 -4.12 -5.90 -5.83    2 #> 671  26     1  48   1    5.3   95.0  135      2 -1.99 -7.03 -9.71    6 #> 690  26    20  36   1   16.0   79.0  162      2 -4.61  0.90  5.34    2 #> 715  28     4  36   2    7.7  103.0  114      2  2.09 -4.60 -7.31    2 #> 757  30     1  24   1    5.5   68.6  106      2 -6.06 -6.01 -4.76    5 svy[svy$flag != 0, c(\"psu\", \"child\", \"flag\")] #>     psu child flag #> 1     1     1    2 #> 29    1    29    1 #> 32    2     1    3 #> 35    2     4    1 #> 88    3    30    1 #> 106   4    18    3 #> 174   7     3    1 #> 198   8     1    1 #> 280  11     7    2 #> 286  11    13    1 #> 292  11    19    1 #> 307  12     3    2 #> 350  14     1    2 #> 352  14     3    6 #> 368  14    19    3 #> 399  15    21    3 #> 400  15    22    3 #> 405  16     4    1 #> 406  16     5    3 #> 408  16     7    1 #> 432  17     3    3 #> 433  17     4    2 #> 490  19     1    2 #> 591  22    24    3 #> 594  23     1    6 #> 595  23     2    5 #> 596  23     3    2 #> 599  23     6    2 #> 616  23    23    2 #> 640  25     1    3 #> 641  25     2    2 #> 671  26     1    6 #> 690  26    20    2 #> 715  28     4    2 #> 757  30     1    5"},{"path":"https://nutriverse.io/nipnTK/articles/flagging.html","id":"applying-smart-flagging-criteria-to-survey-data","dir":"Articles","previous_headings":"","what":"Applying SMART flagging criteria to survey data","title":"Identifying outliers using flags","text":"next exercise apply SMART flagging criteria survey dataset. retrieve survey dataset: create column contain flag code set zero (.e. flags) records: Applying SMART flagging criteria requires us first calculate mean index value: use mean value define flagging ranges: index: number flagged records example dataset. : returns: table shows relative frequency detected problems. See previous table find meaning codes. number flagged records can found using: returns: proportion records flagged can found using: returns: 16% records flagged. high proportion records flagged. Note SMART flagging criteria identify considerably records (126 records flagged) flagging criteria (35 records flagged). example SMART flagging criteria flagged 91 biologically plausible records. can list flagged records using: listed records can checked edited (see previous table). Anthropometric indices can recalculated flagging process repeated records can fixed fixed. listing records displaying large tables may see message like : max.print option sets limit length information can displayed single command. can alter behaviour using:","code":"svy <- read.table(\"flag.ex01.csv\", header = TRUE, sep = \",\") #>   psu child age sex weight height muac oedema   haz   waz   whz #> 1   1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03 #> 2   1     2  13   2    6.4   70.4  116      2 -1.83 -3.04 -2.93 #> 3   1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25 #> 4   1     4  15   1    7.2   75.4  130      2 -1.48 -3.22 -3.57 #> 5   1     5  15   1    7.4   70.0  124      2 -3.61 -2.99 -1.61 #> 6   1     6  18   2    7.7   70.6  130      2 -3.48 -2.40 -0.82 svy$flag <- 0 meanHAZ <- mean(svy$haz, na.rm = TRUE) svy$flag <- ifelse(!is.na(svy$haz) &                (svy$haz < (meanHAZ - 3) | svy$haz > (meanHAZ + 3)),                svy$flag + 1, svy$flag) meanWHZ <- mean(svy$whz, na.rm = TRUE)  svy$flag <- ifelse(!is.na(svy$whz) &               (svy$whz < (meanWHZ - 3) | svy$whz > (meanWHZ + 3)),               svy$flag + 2, svy$flag)   meanWAZ <- mean(svy$waz, na.rm = TRUE)  svy$flag <- ifelse(!is.na(svy$waz) &               (svy$waz < (meanWAZ - 3) | svy$waz > (meanWAZ + 3)),               svy$flag + 4, svy$flag) table(svy$flag) #>  #>   0   1   2   3   4   5   6   7  #> 660  59  11  16   1  19  16   4 table(svy$flag != 0)[\"TRUE\"] #> TRUE  #>  126 prop.table(table(svy$flag != 0))[\"TRUE\"] #>      TRUE  #> 0.1603053 svy[svy$flag != 0, ] #>     psu child age sex weight height muac oedema   haz   waz   whz flag #> 1     1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03    2 #> 3     1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25    1 #> 15    1    15  36   1   12.3   79.7  144      2 -4.42 -1.27  1.97    3 #> 28    1    28  48   2   15.8  109.7  146      2  1.62 -0.12 -1.72    1 #> 29    1    29  24   2   16.3  107.3  155      2  6.69  2.69 -0.82    5 #> 31    1    31  48   2   18.8  109.9  166      2  1.66  1.10  0.13    1 #> 32    2     1  12   1    6.1   99.4  112      2  9.95 -4.02 -9.18    3 #> 34    2     3  24   2    6.5   76.0  108      2 -3.01 -4.61 -4.16    6 #> 35    2     4  24   2    6.8   65.5  128      2 -6.27 -4.30 -0.63    1 #> 36    2     5  36   1    7.3   76.0  110      2 -5.42 -5.15 -3.56    5 #> 42    2    11  12   2    9.9   80.0  150      2  2.32  0.82 -0.21    1 #> 44    2    13  36   2   10.5   78.0  142      2 -4.48 -2.24  0.87    1 #> 52    2    21  36   1   12.7   77.5  144      2 -5.01 -1.01  2.77    3 #> 57    2    26  24   1   15.5   93.7  166      2  2.16  2.13  1.46    5 #> 59    3     1  18   2    5.7   67.0  110      2 -4.72 -4.72 -3.21    5 #> 66    3     8  48   2    9.4   79.0  144      2 -5.51 -4.03 -0.57    1 #> 76    3    18  24   2   12.1   96.0  138      2  3.19  0.42 -1.79    1 #> 88    3    30  24   2   16.9  107.5  158      2  6.75  2.95 -0.47    5 #> 89    4     1  26   2    6.6   71.7  114      2 -4.73 -4.74 -2.95    5 #> 106   4    18  36   1   13.4   65.7  152      2 -8.20 -0.56  7.64    3 #> 107   4    19  24   1   13.7   97.6  150      2  3.43  1.05 -0.89    1 #> 122   5     4  24   1    8.0   73.3  130      2 -4.52 -3.61 -1.66    1 #> 125   5     7  36   2   11.3  106.2  150      2  2.93 -1.63 -4.61    3 #> 139   5    21  24   2   15.2   82.0  138      2 -1.15  2.18  3.97    6 #> 154   6    14  24   2   11.9   91.0  148      2  1.64  0.29 -0.91    1 #> 165   6    25  36   1   14.9  108.0  144      2  3.21  0.31 -2.13    1 #> 173   7     2  10   2    6.5   76.2  122      2  1.91 -2.23 -4.20    3 #> 174   7     3  36   2    6.8   66.6  134      2 -7.47 -5.35 -1.01    5 #> 187   7    16  10   2   11.6   84.3  152      2  5.19  2.50  0.54    5 #> 198   8     1  27   2    5.5   66.0  112      2 -6.59 -5.92 -3.27    5 #> 199   8     2  24   2    6.4   75.0  138      2 -3.32 -4.72 -4.10    6 #> 201   8     4  24   1    7.1   70.5  122      2 -5.44 -4.47 -2.31    1 #> 203   8     6  31   1    8.5   72.9  134      2 -5.71 -3.83 -0.79    1 #> 205   8     8  36   2    9.4   78.0  146      2 -4.48 -3.18 -0.35    1 #> 212   8    15  48   1   11.4  102.5  126      2 -0.20 -2.88 -4.22    2 #> 254   9    30  42   1   17.9  109.4  164      2  2.41  1.23 -0.26    1 #> 255  10     1  23   1    6.7   71.0  118      2 -5.32 -4.76 -3.23    5 #> 274  11     1  24   2    5.8   71.9  108      2 -4.28 -5.34 -4.40    6 #> 280  11     7  24   1    6.7   81.7  140      2 -1.77 -4.86 -5.63    6 #> 283  11    10  36   1    8.5   78.3  126      2 -4.80 -4.20 -2.19    1 #> 286  11    13  48   1    9.4   77.3  146      2 -6.21 -4.25 -0.69    1 #> 290  11    17  24   2   12.4   99.9  136      2  4.40  0.62 -2.33    1 #> 292  11    19  12   1   12.9   92.3  152      2  6.97  2.68 -0.50    5 #> 301  11    28  24   2   15.1   85.3  140      2 -0.13  2.13  2.94    6 #> 302  11    29  30   1   15.2   82.9  154      2 -2.65  1.13  3.76    2 #> 303  11    30  48   2   15.8   90.5  132      2 -2.84 -0.12  2.29    2 #> 307  12     3  36   1    7.5   90.0  130      2 -1.64 -4.99 -6.42    6 #> 313  12     9  12   1   10.0   81.0  150      2  2.21  0.33 -0.75    1 #> 315  12    11  48   1   10.6   84.0  142      2 -4.61 -3.43 -0.75    1 #> 330  13     3  24   1    7.7   73.0  114      2 -4.62 -3.90 -2.06    1 #> 340  13    13  12   2   11.1   79.0  152      2  1.94  1.72  1.26    5 #> 345  13    18  24   1   13.3   96.1  142      2  2.94  0.79 -0.93    1 #> 350  14     1  20   1    5.7   77.8  142      2 -2.27 -5.49 -6.47    6 #> 352  14     3  48   1    6.5   80.7  140      2 -5.40 -6.22 -5.74    7 #> 366  14    17  24   1   12.7   92.3  185      2  1.70  0.39 -0.70    1 #> 368  14    19  48   1   13.4   66.3  144      2 -8.83 -1.58  7.33    3 #> 379  15     1  12   1    5.1   66.0  106      2 -4.10 -5.24 -4.75    6 #> 395  15    17  24   1   13.1   80.0  144      2 -2.33  0.66  2.62    2 #> 399  15    21  36   1   14.3   66.0  154      2 -8.12 -0.02  8.58    3 #> 400  15    22  48   1   14.5   68.0  152      2 -8.42 -0.95  7.80    3 #> 403  16     2  24   1    7.0   74.0  130      2 -4.29 -4.57 -3.56    4 #> 405  16     4  24   2    7.8   65.0  145      2 -6.42 -3.27  1.04    1 #> 406  16     5  12   1    7.8   98.0  138      2  9.36 -1.93 -7.23    3 #> 408  16     7  48   1    8.0   77.0  128      2 -6.28 -5.20 -2.66    5 #> 432  17     3   6   1    7.9   98.4  138      2 14.38 -0.04 -7.18    3 #> 433  17     4  48   2    8.3   94.9  136      2 -1.82 -4.79 -5.63    6 #> 435  17     6   9   1    8.8   77.7  136      2  2.55 -0.11 -1.61    1 #> 448  17    19  36   1   13.9  105.0  138      2  2.41 -0.26 -2.34    1 #> 449  17    20  36   2   14.4  107.5  162      2  3.27  0.30 -2.27    1 #> 460  17    31  48   1   18.5   96.2  170      2 -1.70  0.96  3.00    2 #> 462  18     2   7   1    7.6   76.5  146      2  3.38 -0.80 -3.19    1 #> 464  18     4  23   1    8.0   73.4  134      2 -4.52 -3.49 -1.69    1 #> 468  18     8  36   1    9.3   77.6  140      2 -4.99 -3.57 -0.89    1 #> 483  18    23  24   1   15.8  102.5  146      2  5.04  2.29 -0.21    5 #> 489  18    29  48   2   19.2  109.9  164      2  1.66  1.24  0.35    1 #> 490  19     1  12   2    5.3   72.0  152      2 -0.78 -4.27 -5.30    2 #> 499  19    10  48   1   10.0   84.2  140      2 -4.56 -3.84 -1.53    1 #> 508  19    19  24   1   13.7   98.0  180      2  3.56  1.05 -0.97    1 #> 510  19    21  24   1   13.9   92.7  152      2  1.83  1.18  0.35    1 #> 512  19    23  36   2   15.8  101.5  174      2  1.69  1.00  0.09    1 #> 519  20     7  18   1    9.4   69.5  140      2 -4.73 -1.36  1.47    1 #> 528  20    16  24   2   12.5   91.5  146      2  1.79  0.68 -0.46    1 #> 530  20    18  24   2   13.2   91.2  160      2  1.70  1.11  0.22    1 #> 536  20    24  48   2   17.5  109.9  154      2  1.66  0.61 -0.63    1 #> 537  20    25  36   1   18.1  109.3  162      2  3.57  1.90 -0.11    5 #> 557  21    19  24   2   11.4   92.0  138      2  1.95 -0.05 -1.64    1 #> 587  22    20  36   2   12.7   80.4  154      2 -3.85 -0.68  2.38    2 #> 591  22    24  36   1   14.0   69.0  152      2 -7.31 -0.20  6.77    3 #> 594  23     1  36   1    5.4   80.0  140      2 -4.34 -6.66 -7.27    6 #> 595  23     2  36   1    5.9   72.0  114      2 -6.50 -6.26 -4.96    7 #> 596  23     3  24   1    6.3   77.0  130      2 -3.31 -5.24 -5.38    6 #> 598  23     5  24   2    6.5   71.0  124      2 -4.56 -4.61 -2.93    5 #> 599  23     6  36   1    6.5   80.0  130      2 -4.34 -5.79 -5.61    6 #> 600  23     7  24   2    7.0   70.0  112      2 -4.87 -4.10 -1.75    1 #> 604  23    11  14   1    8.0   66.0  136      2 -4.86 -2.11  0.77    1 #> 607  23    14  36   1    8.3   74.0  138      2 -5.96 -4.36 -1.40    1 #> 612  23    19  48   1   11.5   80.0  144      2 -5.56 -2.81  1.14    1 #> 616  23    23  36   1   16.0   74.0  144      2 -5.96  0.90  6.82    3 #> 621  24     5  24   1    8.4   72.2  140      2 -4.88 -3.22 -0.73    1 #> 633  24    17  24   2   12.9   93.2  152      2  2.32  0.93 -0.46    1 #> 640  25     1  12   2    6.3   99.3  110      2  9.82 -2.96 -8.25    3 #> 641  25     2  48   2    6.7   85.0  140      2 -4.12 -5.90 -5.83    6 #> 649  25    10  36   2    8.6   78.0  134      2 -4.48 -3.85 -1.38    1 #> 661  25    22  24   2   12.4   91.0  140      2  1.64  0.62 -0.44    1 #> 671  26     1  48   1    5.3   95.0  135      2 -1.99 -7.03 -9.71    6 #> 672  26     2  18   2    5.6   67.0  108      2 -4.72 -4.84 -3.41    5 #> 674  26     4  36   1    8.0   76.0  134      2 -5.42 -4.60 -2.40    5 #> 679  26     9  48   1   10.5   82.0  142      2 -5.09 -3.50 -0.38    1 #> 683  26    13  24   1   13.8   75.0  156      2 -3.97  1.11  4.39    2 #> 685  26    15  24   1   14.3   85.0  168      2 -0.69  1.42  2.40    2 #> 689  26    19  36   1   15.8  104.0  148      2  2.14  0.80 -0.54    1 #> 690  26    20  36   1   16.0   79.0  162      2 -4.61  0.90  5.34    3 #> 692  27     2  24   2    7.1   68.2  124      2 -5.43 -4.00 -1.04    1 #> 698  27     8  36   2    8.4   75.4  124      2 -5.16 -4.01 -1.06    1 #> 715  28     4  36   2    7.7  103.0  114      2  2.09 -4.60 -7.31    7 #> 721  28    10  48   1   10.3   82.0  148      2 -5.09 -3.64 -0.61    1 #> 723  28    12  15   1   11.0   73.0  162      2 -2.43  0.59  2.24    2 #> 733  29     1  16   1    5.9   69.2  112      2 -4.26 -4.85 -4.17    6 #> 734  29     2  17   1    6.1   69.3  114      2 -4.53 -4.75 -3.81    5 #> 745  29    13  24   1   11.0   70.3  114      2 -5.50 -0.87  3.01    3 #> 757  30     1  24   1    5.5   68.6  106      2 -6.06 -6.01 -4.76    7 #> 767  30    11  36   2   10.2   77.5  142      2 -4.61 -2.49  0.66    1 #> 781  30    25  24   2   13.3   91.5  152      2  1.79  1.16  0.24    1 #> 783  30    27  36   1   14.2  102.3  138      2  1.68 -0.08 -1.48    1 #> 784  30    28  36   1   14.6  106.1  154      2  2.70  0.15 -1.97    1 #> 786  30    30  36   2   15.5  101.2  154      2  1.61  0.86 -0.05    1 #> [1] \"[ reached getOption(\\\"max.print\\\") -- omitted 43 rows ]\" options(max.print = 99999)"},{"path":"https://nutriverse.io/nipnTK/articles/flagging.html","id":"flagging-data-from-older-children","dir":"Articles","previous_headings":"","what":"Flagging data from older children","title":"Identifying outliers using flags","text":"process flagging anthropometric indices older children similar used younger children. retrieve survey dataset: file flag.ex02.csv comma-separated-value (CSV) file containing anthropometric data survey children aged 11 year older attending school Ethiopia. variables interest height--age z-score (haz) BMI--age z-score (baz). apply flagging criteria (see previous table) variables: Note usually apply SMART flagging criteria older (.e. > 59 months) children. coding flag variable shown previous table. Flagging codes based powers two meanings : returns: table shows relative frequency detected problems. See previous table find meaning codes. number flagged records can found using: returns: proportion records flagged can found using: returns: 1.3% records flagged. acceptably low proportion records flagged. can list flagged records using: listed records can checked edited (see previous table). Anthropometric indices can recalculated flagging process repeated records can fixed fixed.","code":"svy <- read.table(\"flag.ex02.csv\", header = TRUE, sep = \",\") #>   school sex ageMonths weight height   haz   baz #> 1   1112   1       173   25.5  179.0  1.70 -8.19 #> 2   1113   2       145   22.7  164.0  1.79 -6.81 #> 3   1116   1       150   13.5  135.0 -2.40 -8.64 #> 4   1123   1       150   25.3  165.0  1.73 -6.92 #> 5   1404   2       163   19.0  116.5 -6.05 -2.89 #> 6   1501   2       185   27.4  136.6 -3.73 -2.85 svy$flag <- 0  svy$flag <- ifelse(!is.na(svy$haz) & (svy$haz < -6 | svy$haz > 6),                svy$flag + 1, svy$flag)  svy$flag <- ifelse(!is.na(svy$baz) & (svy$baz < -5 | svy$baz > 5),                svy$flag + 2, svy$flag) table(svy$flag) #>  #>   0   1   2  #> 960   2  11 table(svy$flag != 0)[\"TRUE\"] #> TRUE  #>   13 prop.table(table(svy$flag != 0))[\"TRUE\"] #>       TRUE  #> 0.01336074 svy[svy$flag != 0, ] #>     school sex ageMonths weight height   haz   baz flag #> 1     1112   1       173   25.5  179.0  1.70 -8.19    2 #> 2     1113   2       145   22.7  164.0  1.79 -6.81    2 #> 3     1116   1       150   13.5  135.0 -2.40 -8.64    2 #> 4     1123   1       150   25.3  165.0  1.73 -6.92    2 #> 5     1404   2       163   19.0  116.5 -6.05 -2.89    1 #> 23    1501   2       137   24.7  155.0  1.09 -5.20    2 #> 190   1507   1       173   24.0  154.0 -1.52 -6.46    2 #> 328   1511   1       138   26.9  165.5  2.82 -6.29    2 #> 969   1705   1       185   27.4  150.4 -2.62 -5.06    2 #> 970   1708   1       197   23.9  126.2 -6.19 -3.17    1 #> 971   1708   1       185   23.6  140.7 -3.86 -5.21    2 #> 972   1909   2       174   26.5  153.7 -1.04 -5.04    2 #> 973   2001   1       139   20.7  143.1 -0.49 -6.02    2"},{"path":"https://nutriverse.io/nipnTK/articles/nipnTK.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"The NiPN data quality toolkit","text":"document presents set practical analytical methods can applied variables datasets assess quality. index data quality describes scores quality data also presented. focus toolkit data required assess anthropometric status measurements weight, height length, MUAC, sex age. focus anthropometric status many presented methods applied variables. NiPN may commission additional toolkits examine variables types variables. Data quality assessed : Range checks value checks identify univariate outliers. Scatterplots statistical methods identify bivariate outliers. Use flags identify outliers anthropometric indices. Examining distribution statistics distribution measurements anthropometric indices. Assessing extent digit preference recorded measurements. Assessing extent age heaping recorded ages. Examining sex ratio. Examining age distributions age sex distributions. activities proposed order performed shown figure . NiPN data quality workflow material intended provide practical “hands ” introduction assessing data quality presented series computer-based exercises. Example datasets provided. Extensive use made R language environment statistical computing. free powerful data analysis system. Methods described sufficient detail allow activities performed using data analysis systems. R provides extensive language working data. material presented written using small subset R language. Many data quality activities supported R functions written specifically purpose. simplify assessment quality data related anthropometry anthropometric indices. basic R functions, purpose written functions, filenames example datasets also shown figure . purpose written functions described detail .","code":""},{"path":"https://nutriverse.io/nipnTK/articles/rl.html","id":"checking-quantitative-data","dir":"Articles","previous_headings":"","what":"Checking quantitative data","title":"Checking ranges and legal values","text":"use dataset rl.ex01 included nipnTK package. rl.ex01 dataset contains anthropometry data SMART survey Angola. can use summary() function examine range (summary statistics) quantitative variable: returns: graphical examination can also made:  “whiskers” boxplot extend 1.5 times interquartile range ends box (.e., lower upper quartiles). known inner fence. Data points outside inner fence considered mild outliers. NiPN data quality toolkit provides R language function outliersUV() uses method identify outliers: returns: can count number outliers use: returns: can express proportion: returns: may find easier use percentages: returns: muac values identified potential outliers possible muac values: outliersUV() function provides fence parameter alters threshold data point considered outlier. default fence = 1.5 defines inner fence (.e 1.5 times interquartile range lower quartile upper quartile). identify mild severe outliers. value fence = 3 defines outer fence (.e 3 times interquartile range lower quartile upper quartile). identify severe outliers : returns: something wrong values muac. intention muac variable records mid-upper-arm-circumference (MUAC) mm. impossibly small (.e. 11.1, 12.4, 13.2) impossibly large values (.e. 999.0). three impossibly small values probably due data recorded cm rather mm. probably safe change three values 111, 124 132. easiest record separately: alternative approach specify row numbers instead values: three 999.0 values missing values coded 999.0. safe set three values missing using special NA value: Range checks repeated editing data ensure problems fixed: Following boxplot muac variable made using: fixes incorrectly entered data missing values made.  now severe outliers: returns: usually better identify edit extreme univariate outliers, done , use scatterplot statistical distance methods described elsewhere toolkit identify potential outliers.","code":"svy <- rl.ex01 head(svy) #>   age sex weight height muac oedema #> 1  12   2    6.7   68.5  148      2 #> 2   6   1    6.4   65.0  125      2 #> 3   6   2    6.5   65.6  125      2 #> 4   8   1    7.2   68.4  144      2 #> 5  12   M    6.1   65.4  114      2 #> 6   8   1    7.7   66.5  146      2 summary(svy$muac) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    11.1   128.0   139.0   140.3   148.0   999.0 boxplot(svy$muac, horizontal = TRUE, xlab = \"MUAC (mm)\", frame.plot = FALSE) svy[outliersUV(svy$muac), ] #>  #> Univariate outliers : Lower fence = 98, Upper fence = 178 #>     age sex weight height  muac oedema #> 33   24   1    9.8   74.5 180.0      2 #> 93   12   2    6.7   67.0  96.0      1 #> 126  16   2    9.0   74.6 999.0      2 #> 135  18   2    8.5   74.5 999.0      2 #> 194  24   M    7.0   75.0  95.0      2 #> 227   8   M    6.2   66.0  11.1      2 #> 253  35   2    7.6   75.6  97.0      2 #> 381  24   1   10.8   82.8  12.4      2 #> 501  36   2   15.5   93.4 185.0      2 #> 594  21   2    9.8   76.5  13.2      2 #> 714  59   2   18.9   98.5 180.0      2 #> 752  48   2   15.6  102.2 999.0      2 #> 756  59   1   19.4  101.1 180.0      2 #> 873  59   1   20.6  109.4 179.0      2 table(outliersUV(svy$muac)) #>  #> Univariate outliers : Lower fence = 98, Upper fence = 178 #>  #> FALSE  TRUE  #>   892    14 prop.table(table(outliersUV(svy$muac))) #>  #> Univariate outliers : Lower fence = 98, Upper fence = 178 #>  #>      FALSE       TRUE  #> 0.98454746 0.01545254 prop.table(table(outliersUV(svy$muac))) * 100 #>  #> Univariate outliers : Lower fence = 98, Upper fence = 178 #>  #>     FALSE      TRUE  #> 98.454746  1.545254 #>  #> Univariate outliers : Lower fence = 98, Upper fence = 178 #>     age sex weight height  muac oedema #> 33   24   1    9.8   74.5 180.0      2 #> 93   12   2    6.7   67.0  96.0      1 #> 126  16   2    9.0   74.6 999.0      2 #> 135  18   2    8.5   74.5 999.0      2 #> 194  24   M    7.0   75.0  95.0      2 #> 227   8   M    6.2   66.0  11.1      2 #> 253  35   2    7.6   75.6  97.0      2 #> 381  24   1   10.8   82.8  12.4      2 #> 501  36   2   15.5   93.4 185.0      2 #> 594  21   2    9.8   76.5  13.2      2 #> 714  59   2   18.9   98.5 180.0      2 #> 752  48   2   15.6  102.2 999.0      2 #> 756  59   1   19.4  101.1 180.0      2 #> 873  59   1   20.6  109.4 179.0      2 svy[outliersUV(svy$muac, fence = 3), ] #>  #> Univariate outliers : Lower fence = 68, Upper fence = 208 #>     age sex weight height  muac oedema #> 126  16   2    9.0   74.6 999.0      2 #> 135  18   2    8.5   74.5 999.0      2 #> 227   8   M    6.2   66.0  11.1      2 #> 381  24   1   10.8   82.8  12.4      2 #> 594  21   2    9.8   76.5  13.2      2 #> 752  48   2   15.6  102.2 999.0      2 svy$muac[svy$muac == 11.1] <- 111 svy$muac[381] <- 124 svy$muac[594] <- 132 svy$muac[svy$muac == 999.00] <- NA summary(svy$muac) svy[outliersUV(svy$muac), ] svy[outliersUV(svy$muac, fence = 3), ] boxplot(svy$muac, horizontal = TRUE, xlab = \"MUAC (mm)\", frame.plot = FALSE) prop.table(table(outliersUV(svy$muac, fence = 3))) * 100 #>  #> Univariate outliers : Lower fence = 68, Upper fence = 208 #>  #> FALSE  #>   100"},{"path":"https://nutriverse.io/nipnTK/articles/rl.html","id":"editing-data","dir":"Articles","previous_headings":"","what":"Editing data","title":"Checking ranges and legal values","text":"edited records outliers R command line. good idea edit data command line using script containing required commands. script provides record changes made data. R also keeps record whatever command line “history file”. history file plain text file usually called .Rhistory stored home directory. regulatory authorities require keep history file. publications may require provide “reproducible data analysis”. edited annotated copy history file. edit() function provides basic tool editing data interactively. Editing data using edit() function typically three stage process: Create new object containing data requires editing. Use edit() function edit data new object closing data editor window finished. Replace old records edited records. try using separate copy example data: can check edits made using: fixed problems data return: edit() function works differently different operating systems different graphical user interfaces. using RStudio RAnalyticFlow OS X need install XQuartz want use edit() function. XQuartz available : https://www.xquartz.org/index.html","code":"x <- rl.ex01 records2update <- x[outliersUV(x$muac, fence = 3), ] records2update <- edit(records2update) x[row.names(records2update), ] <- records2update #>  #> Univariate outliers : Lower fence = 68, Upper fence = 208 x[outliersUV(x$muac, fence = 3), ] #>  #> Univariate outliers : Lower fence = 68, Upper fence = 208 #>     age sex weight height  muac oedema #> 126  16   2    9.0   74.6 999.0      2 #> 135  18   2    8.5   74.5 999.0      2 #> 227   8   M    6.2   66.0  11.1      2 #> 381  24   1   10.8   82.8  12.4      2 #> 594  21   2    9.8   76.5  13.2      2 #> 752  48   2   15.6  102.2 999.0      2"},{"path":"https://nutriverse.io/nipnTK/articles/rl.html","id":"checking-categorical-variables","dir":"Articles","previous_headings":"","what":"Checking categorical variables","title":"Checking ranges and legal values","text":"can use table() function examine codes used categorical variables. example: returns: intention sex variable coded using 1 male 2 female small number records codes M male F female used. mixed coding scheme like complicate data-management data-analysis. Data sex variable edited ensure consistent coding used: may find records contain meaningless codes. code 3 example dataset , probably, meaning likely simple data entry error. record checked corrected, possible. record corrected sex variable set missing: Legal value checks repeated editing ensure problems fixed: now returns: table contains cells values M, F, 3 R imported variable categorical “factor” variable: returns: can fix redefining levels sex variable:","code":"table(svy$sex) #>  #>   1   2   3   F   M  #> 404 458   1  24  19 svy$sex[svy$sex == \"M\"] <- 1 svy$sex[svy$sex == \"F\"] <- 2 svy$sex[svy$sex == 3] <- NA table(svy$sex) #>  #>   1   2   3   F   M  #> 423 482   0   0   0 str(svy) #> 'data.frame':    906 obs. of  6 variables: #>  $ age   : int  12 6 6 8 12 8 18 9 12 12 ... #>  $ sex   : Factor w/ 5 levels \"1\",\"2\",\"3\",\"F\",..: 2 1 2 1 1 1 1 1 2 1 ... #>  $ weight: num  6.7 6.4 6.5 7.2 6.1 7.7 6.4 7.8 7.5 6.5 ... #>  $ height: num  68.5 65 65.6 68.4 65.4 66.5 66.7 65.3 69.1 70.3 ... #>  $ muac  : num  148 125 125 144 114 146 119 140 138 121 ... #>  $ oedema: int  2 2 2 2 2 2 2 2 2 2 ... levels(svy$sex) <- c(\"1\", \"2\", NA, NA, NA) table(svy$sex) #>  #>   1   2  #> 423 482"},{"path":"https://nutriverse.io/nipnTK/articles/rl.html","id":"saving-changes","dir":"Articles","previous_headings":"","what":"Saving changes","title":"Checking ranges and legal values","text":"edited data. usually want save changes. simple save dataset comma-separated-value (CSV) text file using write.table() function: R can work variety files format usually simplest work simple text files.","code":"write.table(x = svy, file = \"rl.ex01.clean.csv\", sep = \",\", quote = FALSE,              row.names = FALSE, fileEncoding = \"ASCII\")"},{"path":"https://nutriverse.io/nipnTK/articles/sp.html","id":"identifying-outliers-by-observation","dir":"Articles","previous_headings":"","what":"Identifying outliers by observation","title":"Using scatterplots to identify outliers","text":"use dataset sp.ex01: dataset sp.ex01 contains anthropometric data SMART survey Democratic Republic Congo. look relationship height weight dataset: resulting plot shown .  clear positive linear relationship height weight (.e. weight increases increasing height along straight line). can assess strength relationship using Pearson correlation coefficient: returns: close one, indicates perfect positive association. , however points lie outside bulk plotted points. outliers may due errors data. presence oedema can associated increased weight. particular issue severe oedema. outlier high value weight given height due oedema. can check : pch = ifelse(svy$oedema == 1, 19, 1) tells plot() function plot filled circles oedema cases open circles children without oedema. resulting plot shown .  single high weight height outlier appears due presence oedema. filled circles located main mass plotted points show children oedema may body weight within normal range height. children may wasted suffering form severe acute malnutrition (SAM) known kwashiorkor. Outliers can identified eye. identify() function can help : Clicking point cause record (row) number associated point displayed plot (shown ).  Right-clicking plot pressing “escape” key stop identify(). behaviour identify() function may different use alternative user interface R RStudio RAnalyticFlow. identify() function , default, display record (row) numbers identified points. usually needed. Alternative labels can displayed. example: displays height weight values selected points. ability display custom labels useful variable (column) dataset contains unique record identifiers. useful able store record (row) numbers identified points: points shown previous figure clicked identify : return: can examine data identified points: returns: oedema data coded 1 present 2 absent. Data can checked edited needed. Note record 6 oedema case probably left alone. dataset many variables (columns) may specify variables (columns) interest: returns:","code":"svy <- sp.ex01 head(svy) #>   age sex weight height muac oedema #> 1  54   1   20.5  111.5  180      2 #> 2  53   1   19.3  108.0  167      2 #> 3  51   2   19.3  106.0  163      2 #> 4  44   1   18.9  111.0  163      2 #> 5  47   1   18.8  103.0  173      2 #> 6  48   2   18.6   95.3  171      1 plot(svy$height, svy$weight) cor(svy$height, svy$weight, method = \"pearson\", use = \"complete.obs\") #> [1] 0.9204116 plot(svy$height, svy$weight, pch = ifelse(svy$oedema == 1, 19, 1)) plot(svy$height, svy$weight, pch = ifelse(svy$oedema == 1, 19, 1)) identify(svy$height, svy$weight) plot(svy$height, svy$weight, pch = ifelse(svy$oedema == 1, 19, 1))  identify(svy$height, svy$weight,           labels = paste(svy$height, svy$weight, sep = \";\"),           cex = 0.75) plot(svy$height, svy$weight, pch = ifelse(svy$oedema == 1, 19, 1))  stored <- identify(svy$height, svy$weight) stored #> [1] \"1\"  \"6\"  \"16\" \"62\" \"66\" svy[stored, ] #>    age sex weight height muac oedema #> 1   54   1   20.5  111.5  180      2 #> 6   48   2   18.6   95.3  171      1 #> 16  30   1   16.9   92.5  188      2 #> 62  55   1   15.1  118.0  156      2 #> 66  56   1   15.0  115.0  148      2 svy[stored, c(\"weight\", \"height\", \"oedema\")] #>    weight height oedema #> 1    20.5  111.5      2 #> 6    18.6   95.3      1 #> 16   16.9   92.5      2 #> 62   15.1  118.0      2 #> 66   15.0  115.0      2"},{"path":"https://nutriverse.io/nipnTK/articles/sp.html","id":"identifying-outliers-using-statistical-distance","dir":"Articles","previous_headings":"","what":"Identifying outliers using statistical distance","title":"Using scatterplots to identify outliers","text":"formal method identifying outliers use measure statistical distance. common measure statistical distance applied scatterplot data Mahalanobis distance. treats bivariate probability distribution ellipsoid. Mahalanobis distance distance point centre mass distribution divided width ellipsoid direction point:  directions ellipsoid short axis test point must close centre mass distribution. directions ellipsoid long axis test point may distant centre mass distribution. NiPN data quality toolkit provides R language function outliersMD() uses Mahalanobis distance identify outliers dataset: returns set records identified eye: Data can checked edited needed. Note record 6 oedema case probably left alone. can use outliersMD() identify display outliers scatterplot:  outliersMD() function alpha parameter. default value alpha parameter alpha = 0.001. value used automatically unless another value specified. use alpha = 0.001 looking records values extreme expect find probability 0.001 problems data. can calculate number outliers expect see chance alpha = 0.001 using: returns: found five potential outliers. difference number expected number observed (.e. one expected vs. five observed) suggests identified outliers true outliers due data errors. Another way looking alpha parameter alters sensitivity outlierMD() function detecting outliers altering threshold distance used define outliers. can useful using outlierMD() function , , curvilinear relationships (see ). Larger values alpha tend detect potential outliers. example:  : almost cases default alpha = 0.001 appropriate. techniques outlined can used examine relationships pairs anthropometric variables (e.g. weight muac) identify outliers. sensible pairings variables examined.","code":"svy[outliersMD(svy$height, svy$weight), ] #>    age sex weight height muac oedema #> 1   54   1   20.5  111.5  180      2 #> 6   48   2   18.6   95.3  171      1 #> 16  30   1   16.9   92.5  188      2 #> 62  55   1   15.1  118.0  156      2 #> 66  56   1   15.0  115.0  148      2 plot(svy$height, svy$weight, pch = ifelse(outliersMD(svy$height, svy$weight), 19, 1)) round(nrow(svy) * 0.001) #> [1] 1 plot(svy$height, svy$weight,      pch = ifelse(outliersMD(svy$height, svy$weight, alpha = 0.01), 19, 1)) svy[outliersMD(svy$height,svy$weight, alpha = 0.01), ] #>     age sex weight height muac oedema #> 1    54   1   20.5  111.5  180      2 #> 2    53   1   19.3  108.0  167      2 #> 3    51   2   19.3  106.0  163      2 #> 4    44   1   18.9  111.0  163      2 #> 5    47   1   18.8  103.0  173      2 #> 6    48   2   18.6   95.3  171      1 #> 16   30   1   16.9   92.5  188      2 #> 32   43   1   16.2   92.6  166      2 #> 61   26   1   15.1   87.6  168      2 #> 62   55   1   15.1  118.0  156      2 #> 66   56   1   15.0  115.0  148      2 #> 477  38   2   10.3   94.6  160      2 #> 487  32   2   10.2   93.0  150      2 #> 722  17   2    8.6   63.3  136      2"},{"path":"https://nutriverse.io/nipnTK/articles/sp.html","id":"anthropometric-measurements-and-age","dir":"Articles","previous_headings":"","what":"Anthropometric measurements and age","title":"Using scatterplots to identify outliers","text":"also expect anthropometric variables associated age. relationship particularly strong children. less strong adults may weak even reversed older people. can explore relationship anthropometric variable age using techniques described . example:  problems approach. Age often reported recorded considerable age heaping. Age unlikely approximately normally distributed, assumption Mahalanobis distance method. relationship anthropometric variables age usually follows “growth curve” rather straight line. combination age heaping, non-normality, curvilinear relationship may reduce effectiveness Mahalanobis distance method detecting outliers. may useful, cases, increase value alpha parameter. example:  Outliers can listed using value alpha: Mahalanobis distance method usually robust enough deal age data provided appropriate value alpha used.","code":"plot(svy$age, svy$height, pch = ifelse(outliersMD(svy$age, svy$height), 19, 1))  svy[outliersMD(svy$age, svy$height), ] #>    age sex weight height muac oedema #> 4   44   1   18.9    111  163      2 #> 62  55   1   15.1    118  156      2 plot(svy$age, svy$height, pch = ifelse(outliersMD(svy$age, svy$height, alpha = 0.025), 19, 1)) svy[outliersMD(svy$age, svy$height, alpha = 0.025), ] #>     age sex weight height muac oedema #> 1    54   1   20.5  111.5  180      2 #> 4    44   1   18.9  111.0  163      2 #> 7    55   1   18.6  109.3  156      2 #> 14   48   1   17.0  109.0  175      2 #> 27   56   2   16.4  110.0  149      2 #> 62   55   1   15.1  118.0  156      2 #> 66   56   1   15.0  115.0  148      2 #> 113  58   1   14.2   92.0  148      2 #> 129  23   2   14.0   95.0  161      2 #> 190  15   2   13.0   90.5  150      2 #> 212  21   2   12.8   93.0  152      2 #> 378  51   2   11.2   83.0  141      2 #> 453  49   1   10.6   83.0  139      2 #> 461  54   2   10.5   86.8  132      2 #> 551  41   2    9.8   78.0  139      2 #> 599  50   1    9.5   84.7  123      2 #> 660  49   1    9.1   79.5  129      2 #> 722  17   2    8.6   63.3  136      2 #> 809  41   2    7.9   75.7  120      2 #> 881  30   1    6.5   69.6  103      2 #> 893  18   2    5.8   63.2  106      2"},{"path":"https://nutriverse.io/nipnTK/articles/sp.html","id":"difficult-relationships-for-the-mahalanobis-distance-method","dir":"Articles","previous_headings":"","what":"Difficult relationships for the Mahalanobis distance method","title":"Using scatterplots to identify outliers","text":"Mahalanobis distance method works well pairs variables long relationship two variables monotonic (.e. one variables always increases always decreases value variable increases value). usually case anthropometric data. explore use Mahalanobis distance method data monotonic using generated data:  clear relationship x y monotonic relationship (.e. always increasing decreasing). single obvious outlier. Mahalanobis distance method work well data. :  fails detect outlier. Relaxing alpha parameter: help. Relaxing alpha parameter :  results false positive results fails identify clear outlier. Although Mahalanobis distance used directly identify outliers non-monotonic relationships, can applied residuals fitted non-linear models. technique unlikely required anthropometric data covered toolkit. unlikely see non-monotonic relationships anthropometric data. likely see “growth curves” look like :  monotonic relationship. Mahalanobis distance method work well data. add clear outlier:  can detected using Mahalanobis distance method using slightly relaxed alpha value:","code":"x <- c(4, 8, 16, 17, 22, 27, 38, 40, 47, 48, 53, 55, 63, 71, 76, 85, 92, 96)  y <- c(6, 22, 34, 42, 51, 59, 64, 69, 70, 20, 70, 63, 63, 55, 46, 33, 19, 6) plot(x, y) plot(x, y, pch = ifelse(outliersMD(x, y), 19, 1)) plot(x, y, pch = ifelse(outliersMD(x, y, alpha = 0.025), 19, 1)) plot(x, y, pch = ifelse(outliersMD(x, y, alpha = 0.1), 19, 1)) set.seed(0) x <- 0:100 y <- 1 - exp(-x / 50) + rnorm(101, 0, 0.05)  plot(x, y) lines(x, 1 - exp(-x / 50), lty = 2) y[50] <- 0.3 plot(x, y) plot(x, y, pch = ifelse(outliersMD(x, y, alpha = 0.005), 19, 1))"},{"path":"https://nutriverse.io/nipnTK/articles/sp.html","id":"working-with-data-from-older-children","dir":"Articles","previous_headings":"","what":"Working with data from older children","title":"Using scatterplots to identify outliers","text":"now look using scatterplots Mahalanobis distance methods data older children. use sp.ex02 dataset: dataset sp.ex02 contains anthropometric data survey school-age (.e. 5 15 years) children Pakistan. can summarise dataset using: returns: baz variable contains BMI--age z-score calculated ageMonths, sex, weight, height variables using growth reference. key thing notice summary large number missing values waz variable. weight--age z-score calculated children aged older 120 months. can check using: gives: appears nothing odd large number missing values waz variable. investigate missing values baz variable: returns: data required calculate BMI--age z-score present. Given extreme values waz variable likely BMI--age z-scores records calculated, found outside upper lower flagging criteria, value baz set missing. check recalculate BMI--age z-scores. can use scatterplots examine relationship ageMonths, weight, height:    relationships simple younger children: Variability weight appears increase increasing ageMonths. relationship height ageMonths may entirely linear. relationship weight height clearly non-linear. relationships monotonic still able use Mahalanobis distance method identify outliers:    may want experiment different values alpha parameter outliersMD() function described . Records containing values identified outliers can listed: records can checked, edited (required), anthropometric indices recalculated.","code":"svy <- sp.ex02 head(svy) #>   region school ageMonths sex weight height   haz   waz   baz #> 1      1      1        64   1   13.9   97.7 -3.12 -2.56 -0.56 #> 2      1      1        72   1   21.1  118.7  0.56  0.21 -0.25 #> 3      1      1        75   2   15.6  103.7 -2.47 -2.02 -0.53 #> 4      1      1        75   2   16.0  102.7 -2.66 -1.82 -0.08 #> 5      1      1        75   2   17.5  108.7 -1.51 -1.16 -0.31 #> 6      1      1        79   1   15.0  101.0 -3.57 -2.99 -0.53 summary(svy) #>      region          school        ageMonths          sex        #>  Min.   :1.000   Min.   : 1.00   Min.   : 60.0   Min.   :1.000   #>  1st Qu.:3.000   1st Qu.: 8.00   1st Qu.: 83.0   1st Qu.:1.000   #>  Median :4.000   Median :15.00   Median : 98.0   Median :1.000   #>  Mean   :4.491   Mean   :15.51   Mean   :104.8   Mean   :1.397   #>  3rd Qu.:7.000   3rd Qu.:23.00   3rd Qu.:124.0   3rd Qu.:2.000   #>  Max.   :8.000   Max.   :30.00   Max.   :178.0   Max.   :2.000   #>                                                                  #>      weight          height           haz              waz         #>  Min.   :10.30   Min.   : 86.2   Min.   :-5.730   Min.   :-5.350   #>  1st Qu.:17.20   1st Qu.:108.7   1st Qu.:-2.640   1st Qu.:-2.380   #>  Median :21.30   Median :120.9   Median :-1.790   Median :-1.615   #>  Mean   :22.62   Mean   :121.2   Mean   :-1.705   Mean   :-1.581   #>  3rd Qu.:27.00   3rd Qu.:132.6   3rd Qu.:-0.790   3rd Qu.:-0.805   #>  Max.   :51.90   Max.   :164.2   Max.   : 3.550   Max.   : 3.010   #>                                                   NA's   :267      #>       baz          #>  Min.   :-4.7000   #>  1st Qu.:-1.2900   #>  Median :-0.7600   #>  Mean   :-0.7758   #>  3rd Qu.:-0.2100   #>  Max.   : 1.9900   #>  NA's   :8 by(svy$ageMonths, is.na(svy$waz), summary) #> is.na(svy$waz): FALSE #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>   60.00   76.00   88.00   88.24   99.00  120.00  #> ------------------------------------------------------------  #> is.na(svy$waz): TRUE #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>   121.0   125.5   141.0   140.8   151.0   178.0 svy[is.na(svy$baz), ] #>     region school ageMonths sex weight height   haz   waz baz #> 83       1      3       143   2   14.0  125.9 -3.64    NA  NA #> 158      2      6        96   1   12.3  118.4 -1.57 -5.26  NA #> 275      3     10        77   1   10.3  113.9 -0.88 -5.35  NA #> 415      4     15        75   1   33.0  108.3 -1.90  3.01  NA #> 508      5     19        85   2   11.1  111.5 -1.78 -4.84  NA #> 529      6     20        78   1   12.1  111.9 -1.37 -4.45  NA #> 761      8     28        62   1   13.3  115.4  0.99 -2.70  NA #> 806      8     29       100   1   13.2  121.2 -1.36 -5.01  NA plot(svy$ageMonths, svy$weight) plot(svy$ageMonths, svy$height) plot(svy$height, svy$weight) plot(svy$ageMonths, svy$weight,      pch = ifelse(outliersMD(svy$ageMonths, svy$weight), 19, 1)) plot(svy$ageMonths, svy$height,      pch = ifelse(outliersMD(svy$ageMonths, svy$height), 19, 1)) plot(svy$height, svy$weight,      pch = ifelse(outliersMD(svy$height, svy$weight), 19, 1)) svy[outliersMD(svy$ageMonths, svy$weight), ]  #>     region school ageMonths sex weight height   haz  waz   baz #> 57       1      2       161   1   47.0  158.7 -0.05   NA  0.05 #> 83       1      3       143   2   14.0  125.9 -3.64   NA    NA #> 139      2      5       123   2   46.5  144.9  0.64   NA  1.82 #> 319      3     11       143   1   45.2  156.0  1.06   NA  0.50 #> 407      4     14       132   1   46.2  155.3  1.73   NA  0.97 #> 415      4     15        75   1   33.0  108.3 -1.90 3.01    NA #> 672      7     24       175   1   50.5  163.5 -0.42   NA -0.25 #> 727      7     26       147   1   46.1  162.7  1.67   NA -0.14 #> 731      7     26       173   1   51.9  164.2 -0.21   NA -0.03 svy[outliersMD(svy$ageMonths, svy$height), ]  #>     region school ageMonths sex weight height  haz  waz   baz #> 457      5     17       110   1   37.5    155 3.55 1.62 -0.32 svy[outliersMD(svy$weight, svy$height), ] #>     region school ageMonths sex weight height   haz   waz   baz #> 57       1      2       161   1   47.0  158.7 -0.05    NA  0.05 #> 83       1      3       143   2   14.0  125.9 -3.64    NA    NA #> 139      2      5       123   2   46.5  144.9  0.64    NA  1.82 #> 275      3     10        77   1   10.3  113.9 -0.88 -5.35    NA #> 319      3     11       143   1   45.2  156.0  1.06    NA  0.50 #> 322      3     11       155   2   39.0  135.5 -3.01    NA  0.84 #> 369      4     13       118   2   35.5  129.7 -1.32  0.66  1.66 #> 407      4     14       132   1   46.2  155.3  1.73    NA  0.97 #> 415      4     15        75   1   33.0  108.3 -1.90  3.01    NA #> 438      4     15       163   2   39.0  138.3 -2.92    NA  0.41 #> 611      6     22       148   1   41.9  146.5 -0.66    NA  0.75 #> 672      7     24       175   1   50.5  163.5 -0.42    NA -0.25 #> 731      7     26       173   1   51.9  164.2 -0.21    NA -0.03 #> 806      8     29       100   1   13.2  121.2 -1.36 -5.01    NA"},{"path":"https://nutriverse.io/nipnTK/articles/sr.html","id":"analysis-by-age","dir":"Articles","previous_headings":"","what":"Analysis by age","title":"Sex ratio","text":"sex ratio test may performed age group separately. can apply sex ratio test age-group using () function: Note variable ycag created holds year-centred-age-group. approach assumes sex ratio independent age. approach make assumption use numbers male female children age-ranges population taken census data. useful source census data United States Census Bureau’s International Data Base: https://www.census.gov/data-tools/demo/idb/informationGateway.php source gives following estimates Afghanistan 2016: need ensure use age-ranges census: test sex ratio age group separately: tests find significant differences observed expected sex ratios. noted () tests might based small sample sizes: may, therefore, able detect large differences.","code":"svy$ycag <- recode(svy$age, \"6:17=1; 18:29=2; 30:41=3; 42:53=4; 54:59=5\")  by(svy$sex, svy$ycag, sexRatioTest, codes = c(1, 2), pop = c(2.658, 2.508)) #> svy$ycag: 1 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5145 #> Observed proportion male = 0.4879 #> X-squared = 0.4845, p = 0.4864 #>  #> ------------------------------------------------------------  #> svy$ycag: 2 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5145 #> Observed proportion male = 0.5152 #> X-squared = 0.0000, p = 1.0000 #>  #> ------------------------------------------------------------  #> svy$ycag: 3 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5145 #> Observed proportion male = 0.5228 #> X-squared = 0.0374, p = 0.8466 #>  #> ------------------------------------------------------------  #> svy$ycag: 4 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5145 #> Observed proportion male = 0.4875 #> X-squared = 0.3657, p = 0.5454 #>  #> ------------------------------------------------------------  #> svy$ycag: 5 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5145 #> Observed proportion male = 0.4627 #> X-squared = 0.5280, p = 0.4674 svy$ageGroup <- recode(svy$age, \"0:11=0; 12:23=1; 24:35=2; 36:47=3; 48:59=4\") sexRatioTest(svy$sex[svy$ageGroup == 0], pop = c(594602, 573956)) sexRatioTest(svy$sex[svy$ageGroup == 1], pop = c(550593, 533579)) sexRatioTest(svy$sex[svy$ageGroup == 2], pop = c(526827, 510479)) sexRatioTest(svy$sex[svy$ageGroup == 3], pop = c(509048, 493185)) sexRatioTest(svy$sex[svy$ageGroup == 4], pop = c(493521, 478137)) #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5088 #> Observed proportion male = 0.5047 #> X-squared = 0.0000, p = 1.0000 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5078 #> Observed proportion male = 0.4901 #> X-squared = 0.1885, p = 0.6642 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5079 #> Observed proportion male = 0.5374 #> X-squared = 0.6800, p = 0.4096 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5079 #> Observed proportion male = 0.5052 #> X-squared = 0.0000, p = 0.9978 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5079 #> Observed proportion male = 0.4552 #> X-squared = 1.4098, p = 0.2351 table(svy$ageGroup) #>  #>   0   1   2   3   4  #> 107 202 227 192 145"},{"path":"https://nutriverse.io/nipnTK/articles/sr.html","id":"sex-ratios-in-adults","dir":"Articles","previous_headings":"","what":"Sex ratios in adults","title":"Sex ratio","text":"data children usually expect something like one one male female sex ratio. usually case adults, especially older adults. retrieve survey dataset: dataset ah.ex01 comma-separated-value (CSV) file containing anthropometry data Rapid Assessment Method Older People (RAM-OP) survey Dadaab refugee camps Garissa, Kenya. survey older people, defined people aged sixty years older. type survey usually possible use camp administration data find expected male female sex ratio. information given RAM-OP survey report. camp population predominantly Somali. reported 188 thousand men 220 thousand women aged sixty years older Somalia (2010 estimates). sex ratio : : expected proportion population male : : proportion sample male: : looks much smaller expected proportion. sex ratio test: reports: proportion males sample significantly smaller expected. result due extraordinary nature population (e.g. camp population really many older women older men). also due selection bias survey. example, men likely women away home day household sample taken day systematically excluded active members male population. Note sex ratio test applies population surveys. surveys focus (e.g.) carers small children observed male female sex ratio likely strongly biased towards women. cases sensible apply sex ratio test.","code":"svy <- read.table(\"ah.ex01.csv\", header = TRUE, sep = \",\")  head(svy) #>   psu camp block age sex weight height demispan muac oedema #> 1   1  IFO   A01  90   1   40.8  159.3     77.2 20.0      2 #> 2   1  IFO   A01  60   2   69.8  155.3     78.3 35.3      2 #> 3   1  IFO   A01  63   2   51.7  156.8     80.5 25.5      2 #> 4   1  IFO   A01  74   2   61.1  158.9     83.5 27.0      2 #> 5   1  IFO   A01  65   2   55.1  156.9     85.5 24.5      2 #> 6   1  IFO   A01  62   2   56.7  158.1     86.3 26.1      2 188 / 220 #> [1] 0.8545455 188 / (188 + 220) #> [1] 0.4607843 prop.table(table(svy$sex)) #>  #>        1        2  #> 0.381113 0.618887 sexRatioTest(svy$sex, codes = c(1, 2), pop = c(188, 220)) #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.4608 #> Observed proportion male = 0.3811 #> X-squared = 14.8305, p = 0.0001"},{"path":"https://nutriverse.io/nipnTK/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Mark Myatt. Author. Ernest Guevarra. Author, maintainer.","code":""},{"path":"https://nutriverse.io/nipnTK/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Mark Myatt Ernest Guevarra (2023). nipnTK: National Information Platforms Nutrition (NiPN) Data Quality Toolkit R package version 0.1.1.9000 URL https://nutriverse.io/nipnTK/ DOI 10.5281/zenodo.4297897","code":"@Manual{,   title = {nipnTK: National Information Platforms for Nutrition (NiPN) Data Quality Toolkit},   author = {{Mark Myatt} and {Ernest Guevarra}},   year = {2023},   note = {R package version 0.1.1.9000},   url = {https://nutriverse.io/nipnTK/},   doi = {10.5281/zenodo.4297897}, }"},{"path":"https://nutriverse.io/nipnTK/index.html","id":"nipntk-national-information-platforms-for-nutrition-nipn-data-quality-toolkit-","dir":"","previous_headings":"","what":"National Information Platforms for Nutrition Anthropometric Data Toolkit","title":"National Information Platforms for Nutrition Anthropometric Data Toolkit","text":"National Information Platforms Nutrition (NiPN) initiative European Commission provide support countries strengthen information systems nutrition improve analysis data better inform strategic decisions faced prevent malnutrition consequences. part mandate, NiPN commissioned work development toolkit assess quality various nutrition-specific nutrition-related data. companion R package toolkit practical analytical methods can applied variables datasets assess quality. focus toolkit data required assess anthropometric status measurements weight, height length, MUAC, sex age. focus anthropometric status many presented methods applied types data. NiPN may commission additional toolkits examine variables types variables.","code":""},{"path":"https://nutriverse.io/nipnTK/index.html","id":"requirements","dir":"","previous_headings":"","what":"Requirements","title":"National Information Platforms for Nutrition Anthropometric Data Toolkit","text":"R version 3.4 higher Extensive use made R language environment statistical computing. free powerful data analysis system. R provides extensive language working data. companion package written using small subset R language. Many data quality activities described toolkit supported R functions included package written specifically purpose. simplify assessment quality data related anthropometry anthropometric indices.","code":""},{"path":"https://nutriverse.io/nipnTK/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"National Information Platforms for Nutrition Anthropometric Data Toolkit","text":"can install nipnTK CRAN: can install development version nipnTK GitHub :","code":"install.packages(\"nipnTK\") if(!require(remotes)) install.packages(\"remotes\") remotes::install_github(\"nutriverse/nipnTK\")"},{"path":"https://nutriverse.io/nipnTK/index.html","id":"usage","dir":"","previous_headings":"","what":"Usage","title":"National Information Platforms for Nutrition Anthropometric Data Toolkit","text":"Data quality assessed : Range checks value checks identify univariate outliers - guide Scatterplots statistical methods identify bivariate outliers - guide Use flags identify outliers anthropometric indices - guide Examining distribution statistics distribution measurements anthropometric indices - guide Assessing extent digit preference recorded measurements - guide Assessing extent age heaping recorded ages - guide Examining sex ratio - guide Examining age distributions age sex distributions - guide activities proposed order performed shown :","code":""},{"path":"https://nutriverse.io/nipnTK/index.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"National Information Platforms for Nutrition Anthropometric Data Toolkit","text":"find nipnTK package useful, please cite using suggested citation provided call citation function follows:","code":"citation(\"nipnTK\") #>  #> To cite nipnTK in publications use: #>  #>   Mark Myatt and Ernest Guevarra (2023). nipnTK: National Information #>   Platforms for Nutrition (NiPN) Data Quality Toolkit R package version #>   0.1.1.9000 URL https://nutriverse.io/nipnTK/ DOI #>   10.5281/zenodo.4297897 #>  #> A BibTeX entry for LaTeX users is #>  #>   @Manual{, #>     title = {nipnTK: National Information Platforms for Nutrition (NiPN) Data Quality Toolkit}, #>     author = {{Mark Myatt} and {Ernest Guevarra}}, #>     year = {2023}, #>     note = {R package version 0.1.1.9000}, #>     url = {https://nutriverse.io/nipnTK/}, #>     doi = {10.5281/zenodo.4297897}, #>   }"},{"path":"https://nutriverse.io/nipnTK/index.html","id":"community-guidelines","dir":"","previous_headings":"","what":"Community guidelines","title":"National Information Platforms for Nutrition Anthropometric Data Toolkit","text":"Feedback, bug reports feature requests welcome; file issues seek support . like contribute package, please see contributing guidelines. project released Contributor Code Conduct. participating project agree abide terms.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageChildren.html","id":null,"dir":"Reference","previous_headings":"","what":"Goodness of fit to an expected (model-based) age distribution — ageChildren","title":"Goodness of fit to an expected (model-based) age distribution — ageChildren","text":"Goodness fit expected (model-based) age distribution","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageChildren.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Goodness of fit to an expected (model-based) age distribution — ageChildren","text":"","code":"ageChildren(   age,   u5mr = 0,   groups = \"6:17=1; 18:29=2; 30:41=3; 42:53=4; 54:59=5\" )"},{"path":"https://nutriverse.io/nipnTK/reference/ageChildren.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Goodness of fit to an expected (model-based) age distribution — ageChildren","text":"age Vector ages u5mr five years mortality rate deaths / 10,000 persons / day groups Age groupings specified recodes parameter bbw::recode() function; default \"6:17=1; 18:29=2; 30:41=3; 42:53=4; 54:59=5\"","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageChildren.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Goodness of fit to an expected (model-based) age distribution — ageChildren","text":"list class \"ageChildren\" :","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageChildren.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Goodness of fit to an expected (model-based) age distribution — ageChildren","text":"","code":"# Chi-Squared test for age of children in dp.ex02 sample dataset using an # u5mr of 1 / 10,000 / day. svy <- dp.ex02 ac <- ageChildren(svy$age, u5mr = 1) ac #>  #> \tAge Test (Children) #>  #> X-squared = 21.4366, df = 4, p = 0.0003 #>   # Apply function to each sex separately # Males acM <- ageChildren(svy$age[svy$sex == 1], u5mr = 1) acM #>  #> \tAge Test (Children) #>  #> X-squared = 15.8496, df = 4, p = 0.0032 #>  # Females acF <- ageChildren(svy$age[svy$sex == 2], u5mr = 1)  # Simplified call to function by sex by(svy$age, svy$sex, ageChildren, u5mr = 1) #> svy$sex: 1 #>  #> \tAge Test (Children) #>  #> X-squared = 15.8496, df = 4, p = 0.0032 #>  #> ------------------------------------------------------------  #> svy$sex: 2 #>  #> \tAge Test (Children) #>  #> X-squared = 6.8429, df = 4, p = 0.1444 #>"},{"path":"https://nutriverse.io/nipnTK/reference/ageHeaping.html","id":null,"dir":"Reference","previous_headings":"","what":"Age-heaping analysis — ageHeaping","title":"Age-heaping analysis — ageHeaping","text":"Age heaping tendency report children's ages nearest year adults’ ages nearest multiple five ten years. Age heaping common. major reason data nutritional anthropometry surveys often analysed reported using broad age groups.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageHeaping.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Age-heaping analysis — ageHeaping","text":"","code":"ageHeaping(x, divisor = 12)"},{"path":"https://nutriverse.io/nipnTK/reference/ageHeaping.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Age-heaping analysis — ageHeaping","text":"x Vector ages divisor Divisor (usually 5, 6, 10, 12); default 12","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageHeaping.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Age-heaping analysis — ageHeaping","text":"list class \"ageHeaping\" :","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageHeaping.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Age-heaping analysis — ageHeaping","text":"","code":"# Test for age heaping using SMART survey data in Kabul, Afghanistan (dp.ex02) # using a divisor of 12 svy <- dp.ex02 ah12 <- ageHeaping(svy$age) ah12 #>  #> \tAge-heaping Analysis #>  #> data:\tRemainder of svy$age / 12 #> X-squared = 214.9588, df = 11, p-value = 0.0000 #>   # Test for age heaping using SMART survey data in Kabul, Afthanistan (dp.ex02) # using a divisor of 6 ah6 <- ageHeaping(svy$age, divisor = 6) ah6 #>  #> \tAge-heaping Analysis #>  #> data:\tRemainder of svy$age / 6 #> X-squared = 145.0275, df = 5, p-value = 0.0000 #>"},{"path":"https://nutriverse.io/nipnTK/reference/ageRatioTest.html","id":null,"dir":"Reference","previous_headings":"","what":"Age ratio test — ageRatioTest","title":"Age ratio test — ageRatioTest","text":"Age Ratio Test age-related test survey data quality.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageRatioTest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Age ratio test — ageRatioTest","text":"","code":"ageRatioTest(x, ratio = 0.85)"},{"path":"https://nutriverse.io/nipnTK/reference/ageRatioTest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Age ratio test — ageRatioTest","text":"x Numeric vector (age) ratio Expected age ratio","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageRatioTest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Age ratio test — ageRatioTest","text":"lit class \"ageRatioTest\" :","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageRatioTest.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Age ratio test — ageRatioTest","text":"","code":"# Age-ratio test on survey dataset from Kabul, Afghanistan (dp.ex02) # with an age ratio of 0.85 svy <- dp.ex02 ageRatioTest(svy$age, ratio = 0.85) #>  #> \t\tAge Ratio Test (children's data) #>  #>                     Expected age ratio = 0.8500 #> Expected proportion aged 6 - 29 months = 0.4595 #>  #>                     Observed age ratio = 0.8654 #> Observed proportion aged 6 - 29 months = 0.4639 #>  #> X-squared = 0.0531, p = 0.8178 #>   # The age ratio test applied to data for each sex separately by(svy$age, svy$sex, ageRatioTest, ratio = 0.85) #> svy$sex: 1 #>  #> \t\tAge Ratio Test (children's data) #>  #>                     Expected age ratio = 0.8500 #> Expected proportion aged 6 - 29 months = 0.4595 #>  #>                     Observed age ratio = 0.8638 #> Observed proportion aged 6 - 29 months = 0.4635 #>  #> X-squared = 0.0145, p = 0.9041 #>  #> ------------------------------------------------------------  #> svy$sex: 2 #>  #> \t\tAge Ratio Test (children's data) #>  #>                     Expected age ratio = 0.8500 #> Expected proportion aged 6 - 29 months = 0.4595 #>  #>                     Observed age ratio = 0.8670 #> Observed proportion aged 6 - 29 months = 0.4644 #>  #> X-squared = 0.0247, p = 0.8750 #>"},{"path":"https://nutriverse.io/nipnTK/reference/ah.ex01.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for age heaping function — ah.ex01","title":"Example dataset for age heaping function — ah.ex01","text":"Anthropometric data Rapid Assessment Method Older People (RAM-OP) survey Dadaab refugee camp Garissa, Kenya. survey people aged sixty years older.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ah.ex01.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for age heaping function — ah.ex01","text":"","code":"ah.ex01"},{"path":"https://nutriverse.io/nipnTK/reference/ah.ex01.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for age heaping function — ah.ex01","text":"data frame 593 observations 10 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ah.ex01.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Example dataset for age heaping function — ah.ex01","text":"Data courtesy HelpAge International","code":""},{"path":"https://nutriverse.io/nipnTK/reference/as.ex01.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for age and sex distributions function — as.ex01","title":"Example dataset for age and sex distributions function — as.ex01","text":"Data taken household rosters collected part household survey Tanzania.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/as.ex01.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for age and sex distributions function — as.ex01","text":"","code":"as.ex01"},{"path":"https://nutriverse.io/nipnTK/reference/as.ex01.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for age and sex distributions function — as.ex01","text":"data frame 8736 observations 2 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/as.ex02.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for age and sex distributions function — as.ex02","title":"Example dataset for age and sex distributions function — as.ex02","text":"Census data Tanzania taken Wolfram|Alpha knowledge engine.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/as.ex02.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for age and sex distributions function — as.ex02","text":"","code":"as.ex02"},{"path":"https://nutriverse.io/nipnTK/reference/as.ex02.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for age and sex distributions function — as.ex02","text":"data frame 20 observations 4 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/as.ex02.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Example dataset for age and sex distributions function — as.ex02","text":"http://www.wolframalpha.com/input/?=Tanzania+age+distribution","code":""},{"path":"https://nutriverse.io/nipnTK/reference/boxText.html","id":null,"dir":"Reference","previous_headings":"","what":"Plot text in a coloured bounding box. — boxText","title":"Plot text in a coloured bounding box. — boxText","text":"Plot text coloured bounding box.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/boxText.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Plot text in a coloured bounding box. — boxText","text":"","code":"boxText(   x,   y,   labels,   cex = 0.75,   col = \"white\",   border = FALSE,   lwd = 0.5,   pad = TRUE )"},{"path":"https://nutriverse.io/nipnTK/reference/boxText.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Plot text in a coloured bounding box. — boxText","text":"x, y Co-ordinates text plotted labels Text plotted cex Character expansion col Background colour border Border colour lwd Border width pad Add padding (L) (R) ends bounding box","code":""},{"path":"https://nutriverse.io/nipnTK/reference/boxText.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Plot text in a coloured bounding box. — boxText","text":"","code":"## Use of boxtext in the ageHeaping plot function svy <- dp.ex02 ah12 <- ageHeaping(svy$age)  plot.new() boxText(x = as.numeric(names(ah12$tab)),         y = max(ah12$tab) * 0.1,         labels = paste(sprintf(fmt = \"%3.1f\", ah12$pct), \"%\", sep = \"\"),         cex = 0.5,         pad = TRUE)"},{"path":"https://nutriverse.io/nipnTK/reference/digitPreference.html","id":null,"dir":"Reference","previous_headings":"","what":"Digit preference test — digitPreference","title":"Digit preference test — digitPreference","text":"Digit preference observation final number measurement occurs greater frequency expected chance. can occur rounding, practice increasing decreasing value measurement nearest whole half unit, data made . digitPreference() function assesses level digit preference exists given dataset using digit preference score (DPS).","code":""},{"path":"https://nutriverse.io/nipnTK/reference/digitPreference.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Digit preference test — digitPreference","text":"","code":"digitPreference(x, digits = 1, values = 0:9)"},{"path":"https://nutriverse.io/nipnTK/reference/digitPreference.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Digit preference test — digitPreference","text":"x Numeric vector digits Number decimal places x. using digits = 1 (e.g.) allows 105 treated 105.0 values vector possible values final digit (default = 0:9)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/digitPreference.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Digit preference test — digitPreference","text":"list class \"digitPreference\" :","code":""},{"path":"https://nutriverse.io/nipnTK/reference/digitPreference.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Digit preference test — digitPreference","text":"DPS definition : Kari Kuulasmaa K, Hense HW, Tolonen H (MONICA Project), Quality Assessment Data Blood Pressure MONICA Project, MONICA Project e-publications . 9, , Geneva, May 1998 available https://www.thl.fi/publications/monica/bp/bpqa.htm","code":""},{"path":"https://nutriverse.io/nipnTK/reference/digitPreference.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Digit preference test — digitPreference","text":"","code":"# Digit preference test applied to anthropometric data from a single state # from a DHS survey in a West African country svy <- dp.ex01 digitPreference(svy$wt, digits = 1) #>  #> \tDigit Preference Score #>  #> data:\tsvy$wt #> Digit Preference Score (DPS) = 11.86 (Good) #>"},{"path":"https://nutriverse.io/nipnTK/reference/dist.ex01.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for distributions of variables and indices — dist.ex01","title":"Example dataset for distributions of variables and indices — dist.ex01","text":"Anthropometric data SMART survey Kabul, Afghanistan.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/dist.ex01.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for distributions of variables and indices — dist.ex01","text":"","code":"dist.ex01"},{"path":"https://nutriverse.io/nipnTK/reference/dist.ex01.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for distributions of variables and indices — dist.ex01","text":"data frame 873 observations 11 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex01.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for digit preference function — dp.ex01","title":"Example dataset for digit preference function — dp.ex01","text":"Anthropometric data single state Demographic Health Survey (DHS) West African country.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex01.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for digit preference function — dp.ex01","text":"","code":"dp.ex01"},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex01.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for digit preference function — dp.ex01","text":"data frame 796 observations 6 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex02.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for digit preference function — dp.ex02","title":"Example dataset for digit preference function — dp.ex02","text":"Anthropometric data SMART survey Kabul, Afghanistan comma-separated-value (CSV) file format. survey children aged 6-59 months old.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex02.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for digit preference function — dp.ex02","text":"","code":"dp.ex02"},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex02.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for digit preference function — dp.ex02","text":"data frame 873 observations 7 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex03.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for digit preference — dp.ex03","title":"Example dataset for digit preference — dp.ex03","text":"Anthropometric data sample children living refugee camp West African country.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex03.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for digit preference — dp.ex03","text":"","code":"dp.ex03"},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex03.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for digit preference — dp.ex03","text":"data frame 374 observations 6 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex01.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for identifying outliers using flags — flag.ex01","title":"Example dataset for identifying outliers using flags — flag.ex01","text":"Anthropometric data SMART survey Sudan.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex01.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for identifying outliers using flags — flag.ex01","text":"","code":"flag.ex01"},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex01.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for identifying outliers using flags — flag.ex01","text":"data frame 786 observations 11 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex02.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for identifying outliers using flags — flag.ex02","title":"Example dataset for identifying outliers using flags — flag.ex02","text":"Anthropometric data survey children 11 years older attending school Ethiopia.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex02.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for identifying outliers using flags — flag.ex02","text":"","code":"flag.ex02"},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex02.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for identifying outliers using flags — flag.ex02","text":"data.frame 973 observations 7 variables.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex03.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for identifying outliers using flags — flag.ex03","title":"Example dataset for identifying outliers using flags — flag.ex03","text":"Anthropometric data national survey Nigeria.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex03.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for identifying outliers using flags — flag.ex03","text":"","code":"flag.ex03"},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex03.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for identifying outliers using flags — flag.ex03","text":"data frame 18330 observations 10 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/fullTable.html","id":null,"dir":"Reference","previous_headings":"","what":"Fill out a one-dimensional table to include a specified range of values — fullTable","title":"Fill out a one-dimensional table to include a specified range of values — fullTable","text":"Fill one-dimensional table include specified range values","code":""},{"path":"https://nutriverse.io/nipnTK/reference/fullTable.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fill out a one-dimensional table to include a specified range of values — fullTable","text":"","code":"fullTable(x, values = min(x, na.rm = TRUE):max(x, na.rm = TRUE))"},{"path":"https://nutriverse.io/nipnTK/reference/fullTable.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fill out a one-dimensional table to include a specified range of values — fullTable","text":"x vector tabulate values vector values included table. Default : min(x, na.rm = TRUE):max(x, na.rm = TRUE)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/fullTable.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fill out a one-dimensional table to include a specified range of values — fullTable","text":"table object including zero cells","code":""},{"path":"https://nutriverse.io/nipnTK/reference/fullTable.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fill out a one-dimensional table to include a specified range of values — fullTable","text":"","code":"# Generate some artificial data and then apply `fullTable()` set.seed(0) finalDigits <- sample(x = 0:9, size = 1000, replace = TRUE) fullTable(finalDigits) #>   0   1   2   3   4   5   6   7   8   9  #>  95  80  96 102 106  98 109  95 109 110"},{"path":"https://nutriverse.io/nipnTK/reference/greensIndex.html","id":null,"dir":"Reference","previous_headings":"","what":"Green's Index of Dispersion — greensIndex","title":"Green's Index of Dispersion — greensIndex","text":"Implementation Green's Index Dispersion bootstrap. sampling distribution Green's Index well described hence bootstrapping used test whether distribution cases across primary sampling units random.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/greensIndex.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Green's Index of Dispersion — greensIndex","text":"","code":"greensIndex(data, psu, case, replicates = 999)"},{"path":"https://nutriverse.io/nipnTK/reference/greensIndex.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Green's Index of Dispersion — greensIndex","text":"data Survey dataset (R data.frame) psu Name variable holding PSU (cluster) data character vector length = 1 (e.g. psu) case Name variable holding case status character vector length = 1 (e.g. GAM). function assumes case status coded 1 = case replicates Number bootstrap replicates (default 9999)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/greensIndex.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Green's Index of Dispersion — greensIndex","text":"list class GI names:","code":""},{"path":"https://nutriverse.io/nipnTK/reference/greensIndex.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Green's Index of Dispersion — greensIndex","text":"value Green's Index can range -1/(n - 1) maximum uniformity (specific dataset) one maximum clumping. interpretation Green’s Index straightforward:","code":""},{"path":"https://nutriverse.io/nipnTK/reference/greensIndex.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Green's Index of Dispersion — greensIndex","text":"","code":"# Apply Green's Index using anthropometric data from a SMART survey in Sudan # (flag.ex01) svy <- flag.ex01 svy$flag <- 0 svy$flag <- ifelse(!is.na(svy$haz) & (svy$haz < -6 | svy$haz > 6),                    svy$flag + 1, svy$flag) svy$flag <- ifelse(!is.na(svy$whz) & (svy$whz < -5 | svy$whz > 5),                    svy$flag + 2, svy$flag) svy$flag <- ifelse(!is.na(svy$waz) & (svy$waz < -6 | svy$waz > 5),                    svy$flag + 4, svy$flag) svy <- svy[svy$flag == 0, ] svy$stunted <- ifelse(svy$haz < -2, 1, 2)  ## set seed to 0 to replicate results set.seed(0) greensIndex(data = svy, psu = \"psu\", case = \"stunted\") #>  #> \tGreen's Index of Dispersion #>  #> Green's Index (GI) of Dispersion  = -0.0014, 95% CI = (-0.0021, -0.0005) #> Maximum uniformity for this data  = -0.0035 #>                          p-value  =  0.0030 #>"},{"path":"https://nutriverse.io/nipnTK/reference/histNormal.html","id":null,"dir":"Reference","previous_headings":"","what":"Histogram with normal curve superimposed to help with “by-eye” assessments\nof normality of distribution — histNormal","title":"Histogram with normal curve superimposed to help with “by-eye” assessments\nof normality of distribution — histNormal","text":"Histogram normal curve superimposed help “-eye” assessments normality distribution","code":""},{"path":"https://nutriverse.io/nipnTK/reference/histNormal.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Histogram with normal curve superimposed to help with “by-eye” assessments\nof normality of distribution — histNormal","text":"","code":"histNormal(   x,   xlab = deparse(substitute(x)),   ylab = \"Frequency\",   main = deparse(substitute(x)),   breaks = \"Sturges\",   ylim = NULL )"},{"path":"https://nutriverse.io/nipnTK/reference/histNormal.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Histogram with normal curve superimposed to help with “by-eye” assessments\nof normality of distribution — histNormal","text":"x numeric vector xlab x-axis label ylab y-axis label main Plot title breaks Passed hist() function (?hist details) ylim y-axis limits","code":""},{"path":"https://nutriverse.io/nipnTK/reference/histNormal.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Histogram with normal curve superimposed to help with “by-eye” assessments\nof normality of distribution — histNormal","text":"","code":"# histNormal() with data from a SMART survey in Kabul, Afghanistan # (dist.ex01) svy <- dist.ex01 histNormal(svy$muac)  histNormal(svy$haz)  histNormal(svy$waz)  histNormal(svy$whz)"},{"path":"https://nutriverse.io/nipnTK/reference/national.SMART.html","id":null,"dir":"Reference","previous_headings":"","what":"Add SMART flags to a stratified sample survey (e.g. MICS, DHS, national\nSMART) — national.SMART","title":"Add SMART flags to a stratified sample survey (e.g. MICS, DHS, national\nSMART) — national.SMART","text":"Add SMART flags stratified sample survey (e.g. MICS, DHS, national SMART)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/national.SMART.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add SMART flags to a stratified sample survey (e.g. MICS, DHS, national\nSMART) — national.SMART","text":"","code":"national.SMART(x, strata, indices = c(\"haz\", \"whz\", \"waz\"))"},{"path":"https://nutriverse.io/nipnTK/reference/national.SMART.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add SMART flags to a stratified sample survey (e.g. MICS, DHS, national\nSMART) — national.SMART","text":"x Survey dataset (R data.frame) indices present strata Name column x defines strata indices Names columns x containing indices","code":""},{"path":"https://nutriverse.io/nipnTK/reference/national.SMART.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add SMART flags to a stratified sample survey (e.g. MICS, DHS, national\nSMART) — national.SMART","text":"data.frame structure x flagSMART column added. column coded using sums powers two","code":""},{"path":"https://nutriverse.io/nipnTK/reference/national.SMART.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add SMART flags to a stratified sample survey (e.g. MICS, DHS, national\nSMART) — national.SMART","text":"","code":"# Use the national.SMART() function to flag indices from a national # SMART survey in Nigeria (flag.ex03) svy <- flag.ex03 svyFlagged <- national.SMART(x = svy, strata = \"state\")  # Exclude records with flagging codes relevant to whz: svyFlagged <- svyFlagged[!(svyFlagged$flagSMART %in% c(2, 3, 6, 7)), ]"},{"path":"https://nutriverse.io/nipnTK/reference/nipnTK.html","id":null,"dir":"Reference","previous_headings":"","what":"NiPN data quality toolkit — nipnTK","title":"NiPN data quality toolkit — nipnTK","text":"library R functions assessing data-quality nutritional anthropometry surveys.","code":""},{"path":[]},{"path":"https://nutriverse.io/nipnTK/reference/nipnTK.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"NiPN data quality toolkit — nipnTK","text":"Maintainer: Ernest Guevarra ernest@guevarra.io (ORCID) Authors: Mark Myatt mark@brixtonhealth.com (ORCID)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/outliersMD.html","id":null,"dir":"Reference","previous_headings":"","what":"Mahalanobis distance to detect bivariate outliers — outliersMD","title":"Mahalanobis distance to detect bivariate outliers — outliersMD","text":"Mahalanobis distance detect bivariate outliers","code":""},{"path":"https://nutriverse.io/nipnTK/reference/outliersMD.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mahalanobis distance to detect bivariate outliers — outliersMD","text":"","code":"outliersMD(x, y, alpha = 0.001)"},{"path":"https://nutriverse.io/nipnTK/reference/outliersMD.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mahalanobis distance to detect bivariate outliers — outliersMD","text":"x Numeric vector y Numeric vector alpha Critical alpha value detect outlier (defaults 0.001)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/outliersMD.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mahalanobis distance to detect bivariate outliers — outliersMD","text":"logical vector (TRUE outlier p < alpha)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/outliersMD.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Mahalanobis distance to detect bivariate outliers — outliersMD","text":"","code":"# Use outliersMD() to detect outliers in an anthropometric data from # a SMART survey from the Democratic Republic of Congo (sp.ex01) svy <- sp.ex01 svy[outliersMD(svy$height,svy$weight), ] #>    age sex weight height muac oedema #> 1   54   1   20.5  111.5  180      2 #> 6   48   2   18.6   95.3  171      1 #> 16  30   1   16.9   92.5  188      2 #> 62  55   1   15.1  118.0  156      2 #> 66  56   1   15.0  115.0  148      2"},{"path":"https://nutriverse.io/nipnTK/reference/outliersUV.html","id":null,"dir":"Reference","previous_headings":"","what":"IQR to detect univariate outliers — outliersUV","title":"IQR to detect univariate outliers — outliersUV","text":"IQR detect univariate outliers","code":""},{"path":"https://nutriverse.io/nipnTK/reference/outliersUV.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"IQR to detect univariate outliers — outliersUV","text":"","code":"outliersUV(x, fence = 1.5)"},{"path":"https://nutriverse.io/nipnTK/reference/outliersUV.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"IQR to detect univariate outliers — outliersUV","text":"x Numeric vector fence IQR multiplier (defaults 1.5)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/outliersUV.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"IQR to detect univariate outliers — outliersUV","text":"logical vector (TRUE outlier)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/outliersUV.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"IQR to detect univariate outliers — outliersUV","text":"","code":"# Use outliersUV() to detect univariate outliers in an anthropometric # dataset from a SMART survey from Angola (rl.ex01) svy <- rl.ex01 svy[outliersUV(svy$muac), ] #>  #> Univariate outliers : Lower fence = 98, Upper fence = 178 #>  #>     age sex weight height  muac oedema #> 33   24   1    9.8   74.5 180.0      2 #> 93   12   2    6.7   67.0  96.0      1 #> 126  16   2    9.0   74.6 999.0      2 #> 135  18   2    8.5   74.5 999.0      2 #> 194  24   M    7.0   75.0  95.0      2 #> 227   8   M    6.2   66.0  11.1      2 #> 253  35   2    7.6   75.6  97.0      2 #> 381  24   1   10.8   82.8  12.4      2 #> 501  36   2   15.5   93.4 185.0      2 #> 594  21   2    9.8   76.5  13.2      2 #> 714  59   2   18.9   98.5 180.0      2 #> 752  48   2   15.6  102.2 999.0      2 #> 756  59   1   19.4  101.1 180.0      2 #> 873  59   1   20.6  109.4 179.0      2"},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageChildren.html","id":null,"dir":"Reference","previous_headings":"","what":"Plot helper function for ageChildren() function — plot.ageChildren","title":"Plot helper function for ageChildren() function — plot.ageChildren","text":"Plot helper function ageChildren() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageChildren.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Plot helper function for ageChildren() function — plot.ageChildren","text":"","code":"# S3 method for ageChildren plot(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageChildren.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Plot helper function for ageChildren() function — plot.ageChildren","text":"x Object resulting applying ageChildren() function ... Additional barplot() graphical parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageChildren.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Plot helper function for ageChildren() function — plot.ageChildren","text":"Bar plot comparing table observed counts vs table expected counts","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageChildren.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Plot helper function for ageChildren() function — plot.ageChildren","text":"","code":"# Plot Chi-Squared test for age of children in dp.ex02 sample dataset using # an u5mr of 1 / 10,000 / day. svy <- dp.ex02 ac <- ageChildren(svy$age, u5mr = 1) plot(ac)"},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageHeaping.html","id":null,"dir":"Reference","previous_headings":"","what":"plot() helper functions for ageHeaping() functions — plot.ageHeaping","title":"plot() helper functions for ageHeaping() functions — plot.ageHeaping","text":"plot() helper functions ageHeaping() functions","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageHeaping.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"plot() helper functions for ageHeaping() functions — plot.ageHeaping","text":"","code":"# S3 method for ageHeaping plot(x, main = \"\", xlab = \"Remainder\", ylab = \"Frequency\", cex = 0.75, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageHeaping.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"plot() helper functions for ageHeaping() functions — plot.ageHeaping","text":"x Object resulting applying ageHeaping() function main Title plot xlab x-axis label; default Remainder ylab y-axis label; default Frequency cex Character expansion (numeric); default 0.75 ... Additional plot() graphical parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageHeaping.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"plot() helper functions for ageHeaping() functions — plot.ageHeaping","text":"Barplot frequency remainders age divided specified divisor","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageHeaping.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"plot() helper functions for ageHeaping() functions — plot.ageHeaping","text":"","code":"# Plot age heaping test results on SMART survey data in Kabul, Afghanistan # (dp.ex02) using a divisor of 12 svy <- dp.ex02 ah12 <- ageHeaping(svy$age) plot(ah12)"},{"path":"https://nutriverse.io/nipnTK/reference/plot.digitPreference.html","id":null,"dir":"Reference","previous_headings":"","what":"plot() helper function for digitPreference() function — plot.digitPreference","title":"plot() helper function for digitPreference() function — plot.digitPreference","text":"plot() helper function digitPreference() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.digitPreference.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"plot() helper function for digitPreference() function — plot.digitPreference","text":"","code":"# S3 method for digitPreference plot(x, main = \"\", xlab = \"Final Digit\", ylab = \"Frequency\", cex = 0.75, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/plot.digitPreference.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"plot() helper function for digitPreference() function — plot.digitPreference","text":"x Object resulting applying digitPreference() function. main Title plot xlab x-axis label; default \"Final Digit\" ylab y-axis label; default \"Frequency\" cex Character expansion; default 0.75 ... Additional plot() parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.digitPreference.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"plot() helper function for digitPreference() function — plot.digitPreference","text":"Plotted output digitPreference() function comparing frequencies various final digits","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.digitPreference.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"plot() helper function for digitPreference() function — plot.digitPreference","text":"","code":"# Plot output of digit preference test applied to anthropometric data from a # single state from a DHS survey in a West African country svy <- dp.ex01 digitPreference(svy$wt, digits = 1) #>  #> \tDigit Preference Score #>  #> data:\tsvy$wt #> Digit Preference Score (DPS) = 11.86 (Good) #>  plot(digitPreference(svy$wt, digits = 1))"},{"path":"https://nutriverse.io/nipnTK/reference/print.ageChildren.html","id":null,"dir":"Reference","previous_headings":"","what":"print() helper function for ageChildren() function — print.ageChildren","title":"print() helper function for ageChildren() function — print.ageChildren","text":"print() helper function ageChildren() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageChildren.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"print() helper function for ageChildren() function — print.ageChildren","text":"","code":"# S3 method for ageChildren print(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/print.ageChildren.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"print() helper function for ageChildren() function — print.ageChildren","text":"x Object resulting applying ageChildren() function ... Additional print() arguments","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageChildren.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"print() helper function for ageChildren() function — print.ageChildren","text":"Printed output ageChildren() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageChildren.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"print() helper function for ageChildren() function — print.ageChildren","text":"","code":"# Print Chi-Squared test for age of children in dp.ex02 sample dataset using # an u5mr of 1 / 10,000 / day. svy <- dp.ex02 ac <- ageChildren(svy$age, u5mr = 1) print(ac) #>  #> \tAge Test (Children) #>  #> X-squared = 21.4366, df = 4, p = 0.0003 #>"},{"path":"https://nutriverse.io/nipnTK/reference/print.ageHeaping.html","id":null,"dir":"Reference","previous_headings":"","what":"print() helper functions for ageHeaping() functions — print.ageHeaping","title":"print() helper functions for ageHeaping() functions — print.ageHeaping","text":"print() helper functions ageHeaping() functions","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageHeaping.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"print() helper functions for ageHeaping() functions — print.ageHeaping","text":"","code":"# S3 method for ageHeaping print(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/print.ageHeaping.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"print() helper functions for ageHeaping() functions — print.ageHeaping","text":"x Object resulting applying ageHeaping() function ... Additional print() arguments","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageHeaping.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"print() helper functions for ageHeaping() functions — print.ageHeaping","text":"Printed output ageHeaping() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageHeaping.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"print() helper functions for ageHeaping() functions — print.ageHeaping","text":"","code":"# Print age heaping test on SMART survey data in Kabul, Afghanistan (dp.ex02) # using a divisor of 12 svy <- dp.ex02 ah12 <- ageHeaping(svy$age) print(ah12) #>  #> \tAge-heaping Analysis #>  #> data:\tRemainder of svy$age / 12 #> X-squared = 214.9588, df = 11, p-value = 0.0000 #>"},{"path":"https://nutriverse.io/nipnTK/reference/print.ageRatioTest.html","id":null,"dir":"Reference","previous_headings":"","what":"print() helper function for ageRatioTest() function — print.ageRatioTest","title":"print() helper function for ageRatioTest() function — print.ageRatioTest","text":"print() helper function ageRatioTest() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageRatioTest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"print() helper function for ageRatioTest() function — print.ageRatioTest","text":"","code":"# S3 method for ageRatioTest print(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/print.ageRatioTest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"print() helper function for ageRatioTest() function — print.ageRatioTest","text":"x Object resulting applying ageRatioTest() function ... Additional print() arguments","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageRatioTest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"print() helper function for ageRatioTest() function — print.ageRatioTest","text":"Printed output ageRatioTest() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageRatioTest.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"print() helper function for ageRatioTest() function — print.ageRatioTest","text":"","code":"# Print age-ratio test results for survey dataset from Kabul, Afghanistan (dp.ex02) svy <- dp.ex02 print(ageRatioTest(svy$age, ratio = 0.85)) #>  #> \t\tAge Ratio Test (children's data) #>  #>                     Expected age ratio = 0.8500 #> Expected proportion aged 6 - 29 months = 0.4595 #>  #>                     Observed age ratio = 0.8654 #> Observed proportion aged 6 - 29 months = 0.4639 #>  #> X-squared = 0.0531, p = 0.8178 #>"},{"path":"https://nutriverse.io/nipnTK/reference/print.digitPreference.html","id":null,"dir":"Reference","previous_headings":"","what":"print() helper function for digitPreference() function — print.digitPreference","title":"print() helper function for digitPreference() function — print.digitPreference","text":"print() helper function digitPreference() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.digitPreference.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"print() helper function for digitPreference() function — print.digitPreference","text":"","code":"# S3 method for digitPreference print(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/print.digitPreference.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"print() helper function for digitPreference() function — print.digitPreference","text":"x Object resulting applying digitPreference() function. ... Additional print() parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.digitPreference.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"print() helper function for digitPreference() function — print.digitPreference","text":"Printed output digitPreference() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.digitPreference.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"print() helper function for digitPreference() function — print.digitPreference","text":"","code":"# Print output of digit preference test applied to anthropometric data from a #single state from a DHS survey in a West African country svy <- dp.ex01 print(digitPreference(svy$wt, digits = 1)) #>  #> \tDigit Preference Score #>  #> data:\tsvy$wt #> Digit Preference Score (DPS) = 11.86 (Good) #>"},{"path":"https://nutriverse.io/nipnTK/reference/print.greensIndex.html","id":null,"dir":"Reference","previous_headings":"","what":"print() helper function for print.greensIndex() function — print.greensIndex","title":"print() helper function for print.greensIndex() function — print.greensIndex","text":"print() helper function print.greensIndex() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.greensIndex.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"print() helper function for print.greensIndex() function — print.greensIndex","text":"","code":"# S3 method for greensIndex print(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/print.greensIndex.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"print() helper function for print.greensIndex() function — print.greensIndex","text":"x Object resulting applying greensIndex() function ... Additional print() parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.greensIndex.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"print() helper function for print.greensIndex() function — print.greensIndex","text":"Printed output greensIndex() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.greensIndex.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"print() helper function for print.greensIndex() function — print.greensIndex","text":"","code":"# Apply Green's Index using anthropometric data from a SMART survey in Sudan # (flag.ex01) svy <- flag.ex01 svy$flag <- 0 svy$flag <- ifelse(!is.na(svy$haz) & (svy$haz < -6 | svy$haz > 6), svy$flag + 1, svy$flag) svy$flag <- ifelse(!is.na(svy$whz) & (svy$whz < -5 | svy$whz > 5), svy$flag + 2, svy$flag) svy$flag <- ifelse(!is.na(svy$waz) & (svy$waz < -6 | svy$waz > 5), svy$flag + 4, svy$flag) svy <- svy[svy$flag == 0, ] svy$stunted <- ifelse(svy$haz < -2, 1, 2) gi <- greensIndex(data = svy, psu = \"psu\", case = \"stunted\") print(gi) #>  #> \tGreen's Index of Dispersion #>  #> Green's Index (GI) of Dispersion  = -0.0013, 95% CI = (-0.0021, -0.0004) #> Maximum uniformity for this data  = -0.0035 #>                          p-value  =  0.0030 #>"},{"path":"https://nutriverse.io/nipnTK/reference/print.sexRatioTest.html","id":null,"dir":"Reference","previous_headings":"","what":"print() helper function for sexRatioTest() function — print.sexRatioTest","title":"print() helper function for sexRatioTest() function — print.sexRatioTest","text":"print() helper function sexRatioTest() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.sexRatioTest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"print() helper function for sexRatioTest() function — print.sexRatioTest","text":"","code":"# S3 method for sexRatioTest print(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/print.sexRatioTest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"print() helper function for sexRatioTest() function — print.sexRatioTest","text":"x Output resulting applying sexRatioTest() function ... Additional print() parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.sexRatioTest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"print() helper function for sexRatioTest() function — print.sexRatioTest","text":"Printed output sexRatioTest() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.sexRatioTest.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"print() helper function for sexRatioTest() function — print.sexRatioTest","text":"","code":"# Use sexRatioTest() on household roster data from a survey in Tanzania # (as.ex01) and census data of Tanzania extracted from Wolfram|Alpha knowledge # engine (as.ex02) svy <- as.ex01 ref <- as.ex02 censusM <- sum(ref$Males) censusF <- sum(ref$Females) srt <- sexRatioTest(svy$sex, codes = c(1, 2), pop = c(censusM, censusF)) print(srt) #>  #> \tSex Ratio Test #>  #> Expected proportion male = 0.4988 #> Observed proportion male = 0.4914 #> X-squared = 1.8713, p = 0.1713 #>"},{"path":"https://nutriverse.io/nipnTK/reference/print.skewKurt.html","id":null,"dir":"Reference","previous_headings":"","what":"print() helper function for skewKurt() function — print.skewKurt","title":"print() helper function for skewKurt() function — print.skewKurt","text":"print() helper function skewKurt() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.skewKurt.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"print() helper function for skewKurt() function — print.skewKurt","text":"","code":"# S3 method for skewKurt print(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/print.skewKurt.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"print() helper function for skewKurt() function — print.skewKurt","text":"x Object resulting applying skewKurt() function ... Additional print() parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.skewKurt.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"print() helper function for skewKurt() function — print.skewKurt","text":"Printed output skewKurt() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.skewKurt.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"print() helper function for skewKurt() function — print.skewKurt","text":"","code":"# Use skewKurt() on an anthropometric data from a SMART survey in # Kabul, Afghanistan (dist.ex01) svy <- dist.ex01 sk <- skewKurt(svy$muac) print(sk) #>  #> \tSkewness and kurtosis #>  #> Skewness = +0.0525\tSE = 0.0828\tz = 0.6348\tp = 0.5256 #> Kurtosis = -0.2412\tSE = 0.1653\tz = 1.4586\tp = 0.1447 #>"},{"path":"https://nutriverse.io/nipnTK/reference/pyramid.plot.html","id":null,"dir":"Reference","previous_headings":"","what":"Pyramid plot function for creating population pyramids. — pyramid.plot","title":"Pyramid plot function for creating population pyramids. — pyramid.plot","text":"Pyramid plot function creating population pyramids.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/pyramid.plot.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Pyramid plot function for creating population pyramids. — pyramid.plot","text":"","code":"pyramid.plot(   x,   g,   main = paste(\"Pyramid plot of\", deparse(substitute(x)), \"by\", deparse(substitute(g))),   xlab = paste(deparse(substitute(g)), \"(\", levels(as.factor(g))[1], \"/\",     levels(as.factor(g))[2], \")\"),   ylab = deparse(substitute(x)),   col = \"white\",   ... )"},{"path":"https://nutriverse.io/nipnTK/reference/pyramid.plot.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Pyramid plot function for creating population pyramids. — pyramid.plot","text":"x Vector ages (usually grouped) g Vector groups (usually sex) main Plot title xlab x-axis label ylab y-axis label col Colours bars. Either single colour (default col = \"white\") bars, two colours (e.g. col = c(\"lightblue\", \"pink\")) left hand side bars right hand side bars respectively, many colours allocated checkerboard basis bar ... graphical parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/pyramid.plot.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Pyramid plot function for creating population pyramids. — pyramid.plot","text":"table x g (invisible)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/pyramid.plot.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Pyramid plot function for creating population pyramids. — pyramid.plot","text":"","code":"# Use pyramid.plot() on anthropometric data from a SMART survey in # Kabul, Afghanistan (dp.ex02) svy <- dp.ex02 pyramid.plot(svy$age, svy$sex)"},{"path":"https://nutriverse.io/nipnTK/reference/qqNormalPlot.html","id":null,"dir":"Reference","previous_headings":"","what":"Normal quantile-quantile plot — qqNormalPlot","title":"Normal quantile-quantile plot — qqNormalPlot","text":"Normal quantile-quantile plot","code":""},{"path":"https://nutriverse.io/nipnTK/reference/qqNormalPlot.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Normal quantile-quantile plot — qqNormalPlot","text":"","code":"qqNormalPlot(x)"},{"path":"https://nutriverse.io/nipnTK/reference/qqNormalPlot.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Normal quantile-quantile plot — qqNormalPlot","text":"x numeric vector","code":""},{"path":"https://nutriverse.io/nipnTK/reference/qqNormalPlot.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Normal quantile-quantile plot — qqNormalPlot","text":"","code":"# qqNormalPlot() with data from a SMART survey in Kabul, Afghanistan # (dist.ex01) svy <- dist.ex01 qqNormalPlot(svy$muac)  qqNormalPlot(svy$haz)  qqNormalPlot(svy$waz)  qqNormalPlot(svy$whz)"},{"path":"https://nutriverse.io/nipnTK/reference/rl.ex01.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for checking ranges and legal values — rl.ex01","title":"Example dataset for checking ranges and legal values — rl.ex01","text":"Anthropometric data SMART survey Angola.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/rl.ex01.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for checking ranges and legal values — rl.ex01","text":"","code":"rl.ex01"},{"path":"https://nutriverse.io/nipnTK/reference/rl.ex01.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for checking ranges and legal values — rl.ex01","text":"data frame 906 observations 6 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/sexRatioTest.html","id":null,"dir":"Reference","previous_headings":"","what":"Sex Ratio Test — sexRatioTest","title":"Sex Ratio Test — sexRatioTest","text":"Sex Ratio Test","code":""},{"path":"https://nutriverse.io/nipnTK/reference/sexRatioTest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sex Ratio Test — sexRatioTest","text":"","code":"sexRatioTest(sex, codes = c(1, 2), pop = c(1, 1))"},{"path":"https://nutriverse.io/nipnTK/reference/sexRatioTest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sex Ratio Test — sexRatioTest","text":"sex Numeric vector (sex) codes Codes used identify males females (order) pop Relative populations males females (order)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/sexRatioTest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sex Ratio Test — sexRatioTest","text":"list class \"sexRatioTest\" :","code":""},{"path":"https://nutriverse.io/nipnTK/reference/sexRatioTest.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sex Ratio Test — sexRatioTest","text":"","code":"# Use sexRatioTest() on household roster data from a survey in Tanzania # (as.ex01) and census data of Tanzania extracted from Wolfram|Alpha knowledge # engine (as.ex02) svy <- as.ex01 ref <- as.ex02 censusM <- sum(ref$Males) censusF <- sum(ref$Females) sexRatioTest(svy$sex, codes = c(1, 2), pop = c(censusM, censusF)) #>  #> \tSex Ratio Test #>  #> Expected proportion male = 0.4988 #> Observed proportion male = 0.4914 #> X-squared = 1.8713, p = 0.1713 #>"},{"path":"https://nutriverse.io/nipnTK/reference/skewKurt.html","id":null,"dir":"Reference","previous_headings":"","what":"Skew and kurtosis — skewKurt","title":"Skew and kurtosis — skewKurt","text":"Skew kurtosis","code":""},{"path":"https://nutriverse.io/nipnTK/reference/skewKurt.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Skew and kurtosis — skewKurt","text":"","code":"skewKurt(x)"},{"path":"https://nutriverse.io/nipnTK/reference/skewKurt.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Skew and kurtosis — skewKurt","text":"x Numeric vector","code":""},{"path":"https://nutriverse.io/nipnTK/reference/skewKurt.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Skew and kurtosis — skewKurt","text":"list class \"skewKurt\" :","code":""},{"path":"https://nutriverse.io/nipnTK/reference/skewKurt.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Skew and kurtosis — skewKurt","text":"","code":"# Use skewKurt() on an anthropometric data from a SMART survey in # Kabul, Afghanistan (dist.ex01) svy <- dist.ex01 skewKurt(svy$muac) #>  #> \tSkewness and kurtosis #>  #> Skewness = +0.0525\tSE = 0.0828\tz = 0.6348\tp = 0.5256 #> Kurtosis = -0.2412\tSE = 0.1653\tz = 1.4586\tp = 0.1447 #>"},{"path":"https://nutriverse.io/nipnTK/reference/sp.ex01.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for using scatterplots to identify outliers — sp.ex01","title":"Example dataset for using scatterplots to identify outliers — sp.ex01","text":"Anthropometric data SMART survey Democratic Republic Congo.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/sp.ex01.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for using scatterplots to identify outliers — sp.ex01","text":"","code":"sp.ex01"},{"path":"https://nutriverse.io/nipnTK/reference/sp.ex01.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for using scatterplots to identify outliers — sp.ex01","text":"data frame 895 observations 6 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/sp.ex02.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for using scatterplots to identify outliers — sp.ex02","title":"Example dataset for using scatterplots to identify outliers — sp.ex02","text":"Anthropometric data survey school-age (.e., 5 15 years) children Pakistan","code":""},{"path":"https://nutriverse.io/nipnTK/reference/sp.ex02.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for using scatterplots to identify outliers — sp.ex02","text":"","code":"sp.ex02"},{"path":"https://nutriverse.io/nipnTK/reference/sp.ex02.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for using scatterplots to identify outliers — sp.ex02","text":"data frame 849 observations 9 variables","code":""},{"path":"https://nutriverse.io/nipnTK/news/index.html","id":"nipntk-0119000","dir":"Changelog","previous_headings":"","what":"nipnTK 0.1.1.9000","title":"nipnTK 0.1.1.9000","text":"Second release nipnTK. GitHub-development release. release:","code":""},{"path":"https://nutriverse.io/nipnTK/news/index.html","id":"general-updates-0-1-1-9000","dir":"Changelog","previous_headings":"","what":"General updates","title":"nipnTK 0.1.1.9000","text":"remove appveyor.yml Appveyor ci/cd workflow update GitHub Actions workflow latest 5 system standard check update GitHub Actions workflow coverage testing change default git branch name master main add CITATION update CONTRIBUTOR guidelines upgrade website bootstrap 5","code":""},{"path":"https://nutriverse.io/nipnTK/news/index.html","id":"nipntk-010","dir":"Changelog","previous_headings":"","what":"nipnTK 0.1.0","title":"nipnTK 0.1.0","text":"CRAN release: 2020-11-30 first CRAN release nipnTK.","code":""}]
+[{"path":[]},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement ernest@guevarra.io. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://nutriverse.io/nipnTK/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.1, available https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. Community Impact Guidelines inspired [Mozilla’s code conduct enforcement ladder][https://github.com/mozilla/inclusion]. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https://www.contributor-covenant.org/translations.","code":""},{"path":[]},{"path":"https://nutriverse.io/nipnTK/CONTRIBUTING.html","id":"bugs","dir":"","previous_headings":"","what":"Bugs","title":"Contributing","text":"Submit issue issues page","code":""},{"path":"https://nutriverse.io/nipnTK/CONTRIBUTING.html","id":"code-contributions","dir":"","previous_headings":"","what":"Code contributions","title":"Contributing","text":"Fork repository Github account Clone version account machine account Make sure track progress upstream .e., version nipnTK nutriverse/nipnTK, making changes make sure pull changes upstream either git fetch upstream merge later git pull upstream fetch merge one step Make changes new feature branch Please write test tests changes affect code just documentation Push changes account Submit pull request nutriverse/nipnTK","code":"git clone https://github.com/<yourgithubusername>/nipnTK.git git remote add upstream https://github.com/nutriverse/nipnTK.git"},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"GNU General Public License","title":"GNU General Public License","text":"Version 3, 29 June 2007Copyright © 2007 Free Software Foundation, Inc. <http://fsf.org/> Everyone permitted copy distribute verbatim copies license document, changing allowed.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"preamble","dir":"","previous_headings":"","what":"Preamble","title":"GNU General Public License","text":"GNU General Public License free, copyleft license software kinds works. licenses software practical works designed take away freedom share change works. contrast, GNU General Public License intended guarantee freedom share change versions program–make sure remains free software users. , Free Software Foundation, use GNU General Public License software; applies also work released way authors. can apply programs, . speak free software, referring freedom, price. General Public Licenses designed make sure freedom distribute copies free software (charge wish), receive source code can get want , can change software use pieces new free programs, know can things. protect rights, need prevent others denying rights asking surrender rights. Therefore, certain responsibilities distribute copies software, modify : responsibilities respect freedom others. example, distribute copies program, whether gratis fee, must pass recipients freedoms received. must make sure , , receive can get source code. must show terms know rights. Developers use GNU GPL protect rights two steps: (1) assert copyright software, (2) offer License giving legal permission copy, distribute /modify . developers’ authors’ protection, GPL clearly explains warranty free software. users’ authors’ sake, GPL requires modified versions marked changed, problems attributed erroneously authors previous versions. devices designed deny users access install run modified versions software inside , although manufacturer can . fundamentally incompatible aim protecting users’ freedom change software. systematic pattern abuse occurs area products individuals use, precisely unacceptable. Therefore, designed version GPL prohibit practice products. problems arise substantially domains, stand ready extend provision domains future versions GPL, needed protect freedom users. Finally, every program threatened constantly software patents. States allow patents restrict development use software general-purpose computers, , wish avoid special danger patents applied free program make effectively proprietary. prevent , GPL assures patents used render program non-free. precise terms conditions copying, distribution modification follow.","code":""},{"path":[]},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_0-definitions","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"0. Definitions","title":"GNU General Public License","text":"“License” refers version 3 GNU General Public License. “Copyright” also means copyright-like laws apply kinds works, semiconductor masks. “Program” refers copyrightable work licensed License. licensee addressed “”. “Licensees” “recipients” may individuals organizations. “modify” work means copy adapt part work fashion requiring copyright permission, making exact copy. resulting work called “modified version” earlier work work “based ” earlier work. “covered work” means either unmodified Program work based Program. “propagate” work means anything , without permission, make directly secondarily liable infringement applicable copyright law, except executing computer modifying private copy. Propagation includes copying, distribution (without modification), making available public, countries activities well. “convey” work means kind propagation enables parties make receive copies. Mere interaction user computer network, transfer copy, conveying. interactive user interface displays “Appropriate Legal Notices” extent includes convenient prominently visible feature (1) displays appropriate copyright notice, (2) tells user warranty work (except extent warranties provided), licensees may convey work License, view copy License. interface presents list user commands options, menu, prominent item list meets criterion.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_1-source-code","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"1. Source Code","title":"GNU General Public License","text":"“source code” work means preferred form work making modifications . “Object code” means non-source form work. “Standard Interface” means interface either official standard defined recognized standards body, , case interfaces specified particular programming language, one widely used among developers working language. “System Libraries” executable work include anything, work whole, () included normal form packaging Major Component, part Major Component, (b) serves enable use work Major Component, implement Standard Interface implementation available public source code form. “Major Component”, context, means major essential component (kernel, window system, ) specific operating system () executable work runs, compiler used produce work, object code interpreter used run . “Corresponding Source” work object code form means source code needed generate, install, (executable work) run object code modify work, including scripts control activities. However, include work’s System Libraries, general-purpose tools generally available free programs used unmodified performing activities part work. example, Corresponding Source includes interface definition files associated source files work, source code shared libraries dynamically linked subprograms work specifically designed require, intimate data communication control flow subprograms parts work. Corresponding Source need include anything users can regenerate automatically parts Corresponding Source. Corresponding Source work source code form work.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_2-basic-permissions","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"2. Basic Permissions","title":"GNU General Public License","text":"rights granted License granted term copyright Program, irrevocable provided stated conditions met. License explicitly affirms unlimited permission run unmodified Program. output running covered work covered License output, given content, constitutes covered work. License acknowledges rights fair use equivalent, provided copyright law. may make, run propagate covered works convey, without conditions long license otherwise remains force. may convey covered works others sole purpose make modifications exclusively , provide facilities running works, provided comply terms License conveying material control copyright. thus making running covered works must exclusively behalf, direction control, terms prohibit making copies copyrighted material outside relationship . Conveying circumstances permitted solely conditions stated . Sublicensing allowed; section 10 makes unnecessary.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_3-protecting-users-legal-rights-from-anti-circumvention-law","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"3. Protecting Users’ Legal Rights From Anti-Circumvention Law","title":"GNU General Public License","text":"covered work shall deemed part effective technological measure applicable law fulfilling obligations article 11 WIPO copyright treaty adopted 20 December 1996, similar laws prohibiting restricting circumvention measures. convey covered work, waive legal power forbid circumvention technological measures extent circumvention effected exercising rights License respect covered work, disclaim intention limit operation modification work means enforcing, work’s users, third parties’ legal rights forbid circumvention technological measures.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_4-conveying-verbatim-copies","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"4. Conveying Verbatim Copies","title":"GNU General Public License","text":"may convey verbatim copies Program’s source code receive , medium, provided conspicuously appropriately publish copy appropriate copyright notice; keep intact notices stating License non-permissive terms added accord section 7 apply code; keep intact notices absence warranty; give recipients copy License along Program. may charge price price copy convey, may offer support warranty protection fee.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_5-conveying-modified-source-versions","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"5. Conveying Modified Source Versions","title":"GNU General Public License","text":"may convey work based Program, modifications produce Program, form source code terms section 4, provided also meet conditions: ) work must carry prominent notices stating modified , giving relevant date. b) work must carry prominent notices stating released License conditions added section 7. requirement modifies requirement section 4 “keep intact notices”. c) must license entire work, whole, License anyone comes possession copy. License therefore apply, along applicable section 7 additional terms, whole work, parts, regardless packaged. License gives permission license work way, invalidate permission separately received . d) work interactive user interfaces, must display Appropriate Legal Notices; however, Program interactive interfaces display Appropriate Legal Notices, work need make . compilation covered work separate independent works, nature extensions covered work, combined form larger program, volume storage distribution medium, called “aggregate” compilation resulting copyright used limit access legal rights compilation’s users beyond individual works permit. Inclusion covered work aggregate cause License apply parts aggregate.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_6-conveying-non-source-forms","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"6. Conveying Non-Source Forms","title":"GNU General Public License","text":"may convey covered work object code form terms sections 4 5, provided also convey machine-readable Corresponding Source terms License, one ways: ) Convey object code , embodied , physical product (including physical distribution medium), accompanied Corresponding Source fixed durable physical medium customarily used software interchange. b) Convey object code , embodied , physical product (including physical distribution medium), accompanied written offer, valid least three years valid long offer spare parts customer support product model, give anyone possesses object code either (1) copy Corresponding Source software product covered License, durable physical medium customarily used software interchange, price reasonable cost physically performing conveying source, (2) access copy Corresponding Source network server charge. c) Convey individual copies object code copy written offer provide Corresponding Source. alternative allowed occasionally noncommercially, received object code offer, accord subsection 6b. d) Convey object code offering access designated place (gratis charge), offer equivalent access Corresponding Source way place charge. need require recipients copy Corresponding Source along object code. place copy object code network server, Corresponding Source may different server (operated third party) supports equivalent copying facilities, provided maintain clear directions next object code saying find Corresponding Source. Regardless server hosts Corresponding Source, remain obligated ensure available long needed satisfy requirements. e) Convey object code using peer--peer transmission, provided inform peers object code Corresponding Source work offered general public charge subsection 6d. separable portion object code, whose source code excluded Corresponding Source System Library, need included conveying object code work. “User Product” either (1) “consumer product”, means tangible personal property normally used personal, family, household purposes, (2) anything designed sold incorporation dwelling. determining whether product consumer product, doubtful cases shall resolved favor coverage. particular product received particular user, “normally used” refers typical common use class product, regardless status particular user way particular user actually uses, expects expected use, product. product consumer product regardless whether product substantial commercial, industrial non-consumer uses, unless uses represent significant mode use product. “Installation Information” User Product means methods, procedures, authorization keys, information required install execute modified versions covered work User Product modified version Corresponding Source. information must suffice ensure continued functioning modified object code case prevented interfered solely modification made. convey object code work section , , specifically use , User Product, conveying occurs part transaction right possession use User Product transferred recipient perpetuity fixed term (regardless transaction characterized), Corresponding Source conveyed section must accompanied Installation Information. requirement apply neither third party retains ability install modified object code User Product (example, work installed ROM). requirement provide Installation Information include requirement continue provide support service, warranty, updates work modified installed recipient, User Product modified installed. Access network may denied modification materially adversely affects operation network violates rules protocols communication across network. Corresponding Source conveyed, Installation Information provided, accord section must format publicly documented (implementation available public source code form), must require special password key unpacking, reading copying.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_7-additional-terms","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"7. Additional Terms","title":"GNU General Public License","text":"“Additional permissions” terms supplement terms License making exceptions one conditions. Additional permissions applicable entire Program shall treated though included License, extent valid applicable law. additional permissions apply part Program, part may used separately permissions, entire Program remains governed License without regard additional permissions. convey copy covered work, may option remove additional permissions copy, part . (Additional permissions may written require removal certain cases modify work.) may place additional permissions material, added covered work, can give appropriate copyright permission. Notwithstanding provision License, material add covered work, may (authorized copyright holders material) supplement terms License terms: ) Disclaiming warranty limiting liability differently terms sections 15 16 License; b) Requiring preservation specified reasonable legal notices author attributions material Appropriate Legal Notices displayed works containing ; c) Prohibiting misrepresentation origin material, requiring modified versions material marked reasonable ways different original version; d) Limiting use publicity purposes names licensors authors material; e) Declining grant rights trademark law use trade names, trademarks, service marks; f) Requiring indemnification licensors authors material anyone conveys material (modified versions ) contractual assumptions liability recipient, liability contractual assumptions directly impose licensors authors. non-permissive additional terms considered “restrictions” within meaning section 10. Program received , part , contains notice stating governed License along term restriction, may remove term. license document contains restriction permits relicensing conveying License, may add covered work material governed terms license document, provided restriction survive relicensing conveying. add terms covered work accord section, must place, relevant source files, statement additional terms apply files, notice indicating find applicable terms. Additional terms, permissive non-permissive, may stated form separately written license, stated exceptions; requirements apply either way.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_8-termination","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"8. Termination","title":"GNU General Public License","text":"may propagate modify covered work except expressly provided License. attempt otherwise propagate modify void, automatically terminate rights License (including patent licenses granted third paragraph section 11). However, cease violation License, license particular copyright holder reinstated () provisionally, unless copyright holder explicitly finally terminates license, (b) permanently, copyright holder fails notify violation reasonable means prior 60 days cessation. Moreover, license particular copyright holder reinstated permanently copyright holder notifies violation reasonable means, first time received notice violation License (work) copyright holder, cure violation prior 30 days receipt notice. Termination rights section terminate licenses parties received copies rights License. rights terminated permanently reinstated, qualify receive new licenses material section 10.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_9-acceptance-not-required-for-having-copies","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"9. Acceptance Not Required for Having Copies","title":"GNU General Public License","text":"required accept License order receive run copy Program. Ancillary propagation covered work occurring solely consequence using peer--peer transmission receive copy likewise require acceptance. However, nothing License grants permission propagate modify covered work. actions infringe copyright accept License. Therefore, modifying propagating covered work, indicate acceptance License .","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_10-automatic-licensing-of-downstream-recipients","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"10. Automatic Licensing of Downstream Recipients","title":"GNU General Public License","text":"time convey covered work, recipient automatically receives license original licensors, run, modify propagate work, subject License. responsible enforcing compliance third parties License. “entity transaction” transaction transferring control organization, substantially assets one, subdividing organization, merging organizations. propagation covered work results entity transaction, party transaction receives copy work also receives whatever licenses work party’s predecessor interest give previous paragraph, plus right possession Corresponding Source work predecessor interest, predecessor can get reasonable efforts. may impose restrictions exercise rights granted affirmed License. example, may impose license fee, royalty, charge exercise rights granted License, may initiate litigation (including cross-claim counterclaim lawsuit) alleging patent claim infringed making, using, selling, offering sale, importing Program portion .","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_11-patents","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"11. Patents","title":"GNU General Public License","text":"“contributor” copyright holder authorizes use License Program work Program based. work thus licensed called contributor’s “contributor version”. contributor’s “essential patent claims” patent claims owned controlled contributor, whether already acquired hereafter acquired, infringed manner, permitted License, making, using, selling contributor version, include claims infringed consequence modification contributor version. purposes definition, “control” includes right grant patent sublicenses manner consistent requirements License. contributor grants non-exclusive, worldwide, royalty-free patent license contributor’s essential patent claims, make, use, sell, offer sale, import otherwise run, modify propagate contents contributor version. following three paragraphs, “patent license” express agreement commitment, however denominated, enforce patent (express permission practice patent covenant sue patent infringement). “grant” patent license party means make agreement commitment enforce patent party. convey covered work, knowingly relying patent license, Corresponding Source work available anyone copy, free charge terms License, publicly available network server readily accessible means, must either (1) cause Corresponding Source available, (2) arrange deprive benefit patent license particular work, (3) arrange, manner consistent requirements License, extend patent license downstream recipients. “Knowingly relying” means actual knowledge , patent license, conveying covered work country, recipient’s use covered work country, infringe one identifiable patents country reason believe valid. , pursuant connection single transaction arrangement, convey, propagate procuring conveyance , covered work, grant patent license parties receiving covered work authorizing use, propagate, modify convey specific copy covered work, patent license grant automatically extended recipients covered work works based . patent license “discriminatory” include within scope coverage, prohibits exercise , conditioned non-exercise one rights specifically granted License. may convey covered work party arrangement third party business distributing software, make payment third party based extent activity conveying work, third party grants, parties receive covered work , discriminatory patent license () connection copies covered work conveyed (copies made copies), (b) primarily connection specific products compilations contain covered work, unless entered arrangement, patent license granted, prior 28 March 2007. Nothing License shall construed excluding limiting implied license defenses infringement may otherwise available applicable patent law.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_12-no-surrender-of-others-freedom","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"12. No Surrender of Others’ Freedom","title":"GNU General Public License","text":"conditions imposed (whether court order, agreement otherwise) contradict conditions License, excuse conditions License. convey covered work satisfy simultaneously obligations License pertinent obligations, consequence may convey . example, agree terms obligate collect royalty conveying convey Program, way satisfy terms License refrain entirely conveying Program.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_13-use-with-the-gnu-affero-general-public-license","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"13. Use with the GNU Affero General Public License","title":"GNU General Public License","text":"Notwithstanding provision License, permission link combine covered work work licensed version 3 GNU Affero General Public License single combined work, convey resulting work. terms License continue apply part covered work, special requirements GNU Affero General Public License, section 13, concerning interaction network apply combination .","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_14-revised-versions-of-this-license","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"14. Revised Versions of this License","title":"GNU General Public License","text":"Free Software Foundation may publish revised /new versions GNU General Public License time time. new versions similar spirit present version, may differ detail address new problems concerns. version given distinguishing version number. Program specifies certain numbered version GNU General Public License “later version” applies , option following terms conditions either numbered version later version published Free Software Foundation. Program specify version number GNU General Public License, may choose version ever published Free Software Foundation. Program specifies proxy can decide future versions GNU General Public License can used, proxy’s public statement acceptance version permanently authorizes choose version Program. Later license versions may give additional different permissions. However, additional obligations imposed author copyright holder result choosing follow later version.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_15-disclaimer-of-warranty","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"15. Disclaimer of Warranty","title":"GNU General Public License","text":"WARRANTY PROGRAM, EXTENT PERMITTED APPLICABLE LAW. EXCEPT OTHERWISE STATED WRITING COPYRIGHT HOLDERS /PARTIES PROVIDE PROGRAM “” WITHOUT WARRANTY KIND, EITHER EXPRESSED IMPLIED, INCLUDING, LIMITED , IMPLIED WARRANTIES MERCHANTABILITY FITNESS PARTICULAR PURPOSE. ENTIRE RISK QUALITY PERFORMANCE PROGRAM . PROGRAM PROVE DEFECTIVE, ASSUME COST NECESSARY SERVICING, REPAIR CORRECTION.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_16-limitation-of-liability","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"16. Limitation of Liability","title":"GNU General Public License","text":"EVENT UNLESS REQUIRED APPLICABLE LAW AGREED WRITING COPYRIGHT HOLDER, PARTY MODIFIES /CONVEYS PROGRAM PERMITTED , LIABLE DAMAGES, INCLUDING GENERAL, SPECIAL, INCIDENTAL CONSEQUENTIAL DAMAGES ARISING USE INABILITY USE PROGRAM (INCLUDING LIMITED LOSS DATA DATA RENDERED INACCURATE LOSSES SUSTAINED THIRD PARTIES FAILURE PROGRAM OPERATE PROGRAMS), EVEN HOLDER PARTY ADVISED POSSIBILITY DAMAGES.","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"id_17-interpretation-of-sections-15-and-16","dir":"","previous_headings":"TERMS AND CONDITIONS","what":"17. Interpretation of Sections 15 and 16","title":"GNU General Public License","text":"disclaimer warranty limitation liability provided given local legal effect according terms, reviewing courts shall apply local law closely approximates absolute waiver civil liability connection Program, unless warranty assumption liability accompanies copy Program return fee. END TERMS CONDITIONS","code":""},{"path":"https://nutriverse.io/nipnTK/LICENSE.html","id":"how-to-apply-these-terms-to-your-new-programs","dir":"","previous_headings":"","what":"How to Apply These Terms to Your New Programs","title":"GNU General Public License","text":"develop new program, want greatest possible use public, best way achieve make free software everyone can redistribute change terms. , attach following notices program. safest attach start source file effectively state exclusion warranty; file least “copyright” line pointer full notice found. Also add information contact electronic paper mail. program terminal interaction, make output short notice like starts interactive mode: hypothetical commands show w show c show appropriate parts General Public License. course, program’s commands might different; GUI interface, use “box”. also get employer (work programmer) school, , sign “copyright disclaimer” program, necessary. information , apply follow GNU GPL, see <http://www.gnu.org/licenses/>. GNU General Public License permit incorporating program proprietary programs. program subroutine library, may consider useful permit linking proprietary applications library. want , use GNU Lesser General Public License instead License. first, please read <http://www.gnu.org/philosophy/--lgpl.html>.","code":"<one line to give the program's name and a brief idea of what it does.> Copyright (C) 2020 Mark Myatt and Ernest Guevarra  This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.  This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.  You should have received a copy of the GNU General Public License along with this program.  If not, see <http://www.gnu.org/licenses/>. nipnTK Copyright (C) 2020 Mark Myatt and Ernest Guevarra This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free software, and you are welcome to redistribute it under certain conditions; type 'show c' for details."},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"assessing-the-distribution-of-anthropometric-variables-indices-and-indicators","dir":"Articles","previous_headings":"","what":"Assessing the distribution of anthropometric variables, indices, and indicators","title":"Distributions of variables and indices","text":"section examine distribution anthropometric variables (e.g. weight, height, MUAC), anthropometric indices (e.g. WHZ, HAZ, WHZ), anthropometric indicators (e.g. wasted, stunted, underweight). Topics distribution age, age sex, age-heaping, digit preference covered sections toolkit. retrieve survey dataset: file dist.ex01.csv comma-separated-value (CSV) file containing anthropometric data SMART survey Kabul, Afghanistan.","code":"svy <- read.table(\"dist.ex01.csv\", header = TRUE, sep = \",\") head(svy)"},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"numerical-summaries","dir":"Articles","previous_headings":"","what":"Numerical summaries","title":"Distributions of variables and indices","text":"summary() function R provides six-figure summary (.e. minimum, first quartile, median, means, third quartile, maximum) numeric variable. example: returns: six-figure summary report standard deviation. sd() function R calculates standard deviation. example: returns: sd() function may return NA. happen missing values specified variable. happens can instruct function ignore missing values: returns value: Using na.rm parameter way (.e. specifying na.rm = TRUE) works many descriptive functions R (see table ). descriptive functions R","code":"summary(svy$weight) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    4.90    9.00   11.00   11.13   13.10   20.70 sd(svy$weight) #> [1] 2.802065 sd(svy$weight, na.rm = TRUE) #> [1] 2.802065"},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"graphical-and-numerical-summaries","dir":"Articles","previous_headings":"","what":"Graphical and numerical summaries","title":"Distributions of variables and indices","text":"Numerical summaries useful checking data within expected range. Graphical methods often informative numerical summaries. key graphical method examining distribution variable histogram. example: displays histogram weight variable example dataset (see figure ).  need careful examining distribution measurements, may vary sex. example: display heights males females. , display two separate distributions single distribution.  case sensible look data males data females using separate histograms:  using box-plot:  Numerical summaries can also used: returns:","code":"hist(svy$weight) hist(svy$height) hist(svy$height[svy$sex == 1])  hist(svy$height[svy$sex == 2]) boxplot(svy$height ~ svy$sex, names = c(\"M\", \"F\"),          xlab = \"Sex\", ylab = \"Height (cm)\", main = \"Height by sex\") by(svy$height, svy$sex, summary) #> svy$sex: 1 #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>   56.20   75.00   81.95   82.49   90.00  110.50  #> ------------------------------------------------------------  #> svy$sex: 2 #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>   58.00   73.25   80.30   81.30   88.95  109.50"},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"normal-distributions","dir":"Articles","previous_headings":"","what":"Normal distributions","title":"Distributions of variables and indices","text":"anthropometric variables indices usually expect symmetrical (nearly symmetrical) “bell-shaped” distribution. variables indices interest usually: plots shown .  Histograms showing distribution anthropometric indices example dataset number size “intervals” (breaks) used plotting histogram calculated produce useful plot. intervals used based range data. can specify different set breaks hist() function use. example:  calculates intervals using standard deviation sample size. :  calculates intervals using inter-quartile range. :  use 40 intervals. :  uses intervals 0.5 z-scores wide full range haz. plots show nearly symmetrical “bell-shaped” distributions. ideal symmetrical “bell-shaped” distribution normal distribution. number ways assessing whether variable normally distributed. first way assessing whether variable normally distributed simple “-eye” assessment already done using histograms. NiPN data quality toolkit provides R language function called histNormal() can help “-eye” assessments superimposing normal curve histogram variable interest: plots shown . variables appear approximately normally distributed.  Histograms anthropometric indices normal curves superimposed Changing breaks parameter may make histogram easier “read”. example:  Another graphical method assessing whether variable normally distributed normal quantile-quantile plot. easy produce using R. NiPN data quality toolkit provided helper function called qqNormalPlot() produces slightly enhanced normal quantile-quantile plot:  plot shown (annotations). example tails distribution contain cases expected perfectly normally distributed variable. Annotated normal quantile-quantile plot whz variable example dataset examine relevant variables: plots shown . evidence small deviations normality muac, haz, whz.  Normal quantile-quantile plots anthropometric indices example dataset final way assessing normality use formal statistical significance test. preferred test Shapiro-Wilk test normality: tests indicate muac, haz, whz significantly non-normal. Examination histograms normal quantile-quantile plots show deviation normality indices particular marked. indices symmetrical, nearly symmetrical, “bell-shaped” distributions. need careful using significance tests Shapiro-Wilk test normality results can strongly influenced sample size. Small sample sizes can lead tests missing large effects large sample sizes can lead tests identifying small effects highly significant. analysis found highly significant small deviations normality probably detected significance test smaller sample size used. can simulate considerably smaller sample size taking, example, every fourth muac value: Inspecting smaller sample graphically:  yields results similar found complete sample used, formal test: longer significant p < 0.05. distribution appears normal (.e. symmetrical, nearly symmetrical, “bell-shaped” distribution) usually safe assume normality use statistical procedures assume normality. Formal tests normality can misleading sample sizes hundred cases used. Graphical methods useful sample sizes small. Formal test useful sample sizes large. sample sizes anthropometry surveys large enough cause formal tests normality identify small deviations normality highly significant.","code":"hist(svy$muac)  hist(svy$haz)  hist(svy$waz)  hist(svy$whz) hist(svy$haz, breaks = \"scott\") hist(svy$haz, breaks = \"FD\") hist(svy$haz, breaks = 40) hist(svy$haz,       breaks = seq(from = floor(min(svy$haz)), to = ceiling(max(svy$haz)), by = 0.5)) histNormal(svy$muac) histNormal(svy$haz) histNormal(svy$waz) histNormal(svy$whz) histNormal(svy$haz, breaks = 15) qqNormalPlot(svy$whz) qqNormalPlot(svy$muac)  qqNormalPlot(svy$haz)  qqNormalPlot(svy$waz)  qqNormalPlot(svy$whz) shapiro.test(svy$muac)  #>  #>  Shapiro-Wilk normality test #>  #> data:  svy$muac #> W = 0.99496, p-value = 0.005495 shapiro.test(svy$haz)  #>  #>  Shapiro-Wilk normality test #>  #> data:  svy$haz #> W = 0.99348, p-value = 0.0007455 shapiro.test(svy$waz)  #>  #>  Shapiro-Wilk normality test #>  #> data:  svy$waz #> W = 0.99827, p-value = 0.5358 shapiro.test(svy$whz) #>  #>  Shapiro-Wilk normality test #>  #> data:  svy$whz #> W = 0.99078, p-value = 2.777e-05 length(svy$muac) #> [1] 873 oneQuarter <- svy$muac[seq(from = 1, to = length(svy$muac), by = 4)]  length(oneQuarter) #> [1] 219 histNormal(oneQuarter)  qqNormalPlot(oneQuarter) shapiro.test(oneQuarter) #>  #>  Shapiro-Wilk normality test #>  #> data:  oneQuarter #> W = 0.98836, p-value = 0.0724"},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"skew-and-kurtosis","dir":"Articles","previous_headings":"","what":"Skew and kurtosis","title":"Distributions of variables and indices","text":"Skew measure asymmetry distribution mean. Skew can zero, positive, negative. Zero skew found distribution perfectly symmetrical. Positive skew found long right tail distribution mass distribution concentrated left. Negative skew found long left tail distribution mass distribution concentrated right. can usually see skew histograms. can also calculate skewness statistic test significantly different zero. Kurtosis measure much distribution concentrated mean. Kurtosis can zero, positive, negative. Zero kurtosis found variable normally distributed. Positive kurtosis found mass distribution concentrated mean values far mean. Negative kurtosis found mass distribution concentrated tails distribution. can usually see kurtosis histograms. can also calculate kurtosis statistic test significantly different zero. NiPN data quality toolkit provides R language function called skewKurt() calculates skewness kurtosis statistics tests whether differ significantly zero. apply skewKurt() function muac variable example dataset: returns: positive skew negative kurtosis. Neither significantly different zero. Applying skewKurt() function haz variable example dataset: returns: positive skew positive kurtosis. skew significantly different zero. skew can seen histogram:  Applying skewKurt() function waz variable example dataset: returns: negative skew positive kurtosis. Neither significantly different zero. Applying skewKurt() function whz variable example dataset: returns: positive skew positive kurtosis. kurtosis significantly different zero. kurtosis can seen histogram:  tall central columns exceed expected values shown overlaid normal distribution. Skew kurtosis used SMART plausibility checks. Table shows skew kurtosis statistics applied SMART. range absolute values skewness kurtosis statistics applied SMART (2015) whz variable example dataset considered “problematic” according scheme kurtosis 0.6. Care exercised using statistical significance tests classify data “problematic”. use thresholds ranges skew kurtosis statistics usually better approach relying tests statistical significance. Significance tests can strongly affected sample size. Small sample sizes can lead tests missing large effects large sample sizes can lead tests identifying small effects highly significant. distribution appears normal (.e. symmetrical, nearly symmetrical, “bell-shaped” distribution) usually safe assume normality use statistical procedures assume normality. important remember normal distribution mathematical abstraction. nothing compelling real world conform normal distribution. normal distribution become reified: Everyone sure [normal distribution] … experimentalists believe mathematical theorem, mathematicians experimentally determined fact.  — Henri Poincaré (1912), Calcul des Probabilités data see may representative reality even fails tests normality. Tests normality useful selecting statistical methods rely normality. less useful determining data quality. data follows symmetrical, nearly symmetrical, “bell-shaped” distribution usually safe use.","code":"skewKurt(svy$muac) #>  #>  Skewness and kurtosis #>  #> Skewness = +0.0525   SE = 0.0828 z = 0.6348  p = 0.5256 #> Kurtosis = -0.2412   SE = 0.1653 z = 1.4586  p = 0.1447 skewKurt(svy$haz) #>  #>  Skewness and kurtosis #>  #> Skewness = +0.3074   SE = 0.0828 z = 3.7149  p = 0.0002 #> Kurtosis = +0.2074   SE = 0.1653 z = 1.2545  p = 0.2097 histNormal(svy$haz, breaks = \"scott\") skewKurt(svy$waz) #>  #>  Skewness and kurtosis #>  #> Skewness = -0.0128   SE = 0.0828 z = 0.1541  p = 0.8775 #> Kurtosis = +0.1805   SE = 0.1653 z = 1.0919  p = 0.2749 skewKurt(svy$whz) #>  #>  Skewness and kurtosis #>  #> Skewness = +0.0823   SE = 0.0828 z = 0.9946  p = 0.3199 #> Kurtosis = +0.7528   SE = 0.1653 z = 4.5530  p = 0.0000 histNormal(svy$whz, breaks = \"scott\")"},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"deviation-from-normality","dir":"Articles","previous_headings":"","what":"Deviation from normality","title":"Distributions of variables and indices","text":"anthropometric survey methods (e.g. SMART) use deviations perfect normality indicator poor data quality. sensible approach deviations normality necessarily due poor quality data; can due sampling mixed population. easy demonstrate simulated data. assume population consisting two groups: Group 1 : 75% population, mean = -0.48, sd = 0.87 Group 2 : 25% population, mean = -1.04, sd = 1.10 take sample size = 1000 whole population. can simulate : distributions two subgroups (g1 g2) normally distributed:   distribution entire sample (g1g2) normal:  Shapiro-Wilk test normality returns: statistically significant negative skew: , however, nothing wrong sample data distribution entire sample (g1g2) called “mixture Gaussians” (term “Gaussian” refers normal distribution context). can see mixture Gaussians :  case mixture already known. number methods revealing underlying mixture components mixture unknown. techniques covered toolkit. , however, continue example components mixture suspected. expect see small deviations normality survey datasets. often case survey samples subjects wide area covering, example, several agro-ecological zones, socio-economic groups, ethnic groups. almost always case, particularly large surveys DHS, MICS, national SMART surveys. Another reason non-normality one () survey teams systematic bias making measurement. Identifying “offending” survey team examining testing normality separately combinations data \\(n ~ – ~ 1\\) survey teams can attempted. (e.g.) three teams need separately test data : Team 1 Team 2 (Team 3 excluded) Team 1 Team 3 (Team 2 excluded) Team 2 Team 3 (Team 1 excluded) see deviation normality disappears particular team’s data excluded. , however, problem type analysis. cluster-sampled surveys, teams often sample adjacent primary sampling units (clusters). occurs “exclude one team” analysis distinguish differences due spatial heterogeneity (.e. patchiness) differences due team systematic measurement bias.","code":"set.seed(0) g1 <- rnorm(n = 750, mean = -0.48, sd = 0.87)  g2 <- rnorm(n = 250, mean = -1.04, sd = 1.11)  g1g2 <- c(g1, g2) histNormal(g1)  qqNormalPlot(g1) shapiro.test(g1)  skewKurt(g1) #>  #>  Shapiro-Wilk normality test #>  #> data:  g1 #> W = 0.99725, p-value = 0.2411 #>  #>  Skewness and kurtosis #>  #> Skewness = +0.1149   SE = 0.0893 z = 1.2867  p = 0.1982 #> Kurtosis = -0.1869   SE = 0.1783 z = 1.0483  p = 0.2945 histNormal(g2)  qqNormalPlot(g2)  shapiro.test(g2)  skewKurt(g2) #>  #>  Shapiro-Wilk normality test #>  #> data:  g2 #> W = 0.9947, p-value = 0.5363 #>  #>  Skewness and kurtosis #>  #> Skewness = +0.0317   SE = 0.1540 z = 0.2058  p = 0.8369 #> Kurtosis = -0.1282   SE = 0.3068 z = 0.4178  p = 0.6761 histNormal(g1g2)  qqNormalPlot(g1g2)  shapiro.test(g1g2)  skewKurt(g1g2) #>  #>  Shapiro-Wilk normality test #>  #> data:  g1g2 #> W = 0.99671, p-value = 0.03514 #>  #>  Skewness and kurtosis #>  #> Skewness = -0.1767   SE = 0.0773 z = 2.2851  p = 0.0223 #> Kurtosis = +0.2894   SE = 0.1545 z = 1.8728  p = 0.0611 hist(g1, col=rgb(0.2, 0.2, 0.2, 0.5),      breaks = seq(-5, 3, 0.5), xlab = \"\", main = \"\") hist(g2, col=rgb(0.8, 0.8, 0.8, 0.5), breaks = seq(-5, 3, 0.5), add = TRUE)  title(main = \"Histogram of g1 and g2\", xlab = \"z-score\")"},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"the-standard-deviation-and-alternatives","dir":"Articles","previous_headings":"","what":"The standard deviation and alternatives","title":"Distributions of variables and indices","text":"standard deviation sometimes considered useful measure data quality applied z-scores. can use sd() function find standard deviation. example: returns: 1.323469 may produce misleading values applied raw data. procedure applied cleaned data erroneous data flagged records censored. SMART guidelines state acceptable range standard deviation weight--height z-scores (whz) 0.8 1.2 flagging criteria applied flagged records censored. Standard deviations outside range considered indicate poor survey quality. Note SMART define threshold anthropometric indices weight--height z-scores. important note standard deviation 1.2 may due sampling mixed population rather due poor data quality. flag column example dataset contains flagging code codes 2, 3, 6, 7 indicate potential problems weight / height. calculate standard deviation whz variable using data records flagging codes censored oedema recorded: ! character specifies logical “”. standard deviation , therefore, calculated using records flag variable contain 2, 3, 6, 7 oedema recorded present. standard deviation whz flagged records oedema cases censored : within SMART acceptable range 0.8 1.2. problem using standard deviation raw data non-robust statistic. means can strongly influenced outliers. example: returns: Adding single outlier (e.g. data entered 7.84 rather 4.78): returns: example single outlier strongly influenced standard deviation. number robust estimators standard deviation. R provides mad() function calculate adjusted median absolute deviation (MAD). median absolute deviation (MAD) defined median absolute deviations median. median absolute values differences individual data points median data: \\[ MAD ~ = ~ ( | x_i ~ - ~ median(x) | ) \\] calculated MAD adjusted make consistent standard deviation: \\[ \\hat{\\sigma} ~ = ~ k ~ \\times ~ MAD  \\] k constant scaling factor, depends upon distribution. normal distribution: \\[ k ~ = ~ 1.4826 \\] mad() function R function returns adjusted MAD: \\[ \\hat{\\sigma} ~ = ~ 1.4826 ~ \\times ~ MAD \\] robust estimate standard deviation. estimator preferred sample taken mixed population (almost always case) distribution “fat” “heavy” tails, case whz variable example dataset. Using mad() function raw WHZ data: returns: usually want calculate adjusted MAD whz variable using data records flagging codes relevant whz cases oedema censored: returns: use standard deviation robust equivalents adjusted MAD simple thresholds problematic. Data mixture Gaussians distributions tend large standard deviations even systematic error nothing wrong sample. Checks standard deviation large surveys , therefore, performed smallest spatial strata PSU cluster level. reduces eliminate problem sampling mixed populations. retrieve dataset examine within-strata MADs: file flag.ex03.csv comma-separated-value (CSV) file containing anthropometric data national SMART survey Nigeria. data stored file flag.ex03.csv collected using methods similar MICS DHS surveys. difference survey concentrated anthropometric data children aged 6 59 months. exercise concentrate WHZ. Data stratified region state within region. create new variable combines region state: can examine adjusted MAD whz combination region state survey dataset using: long output can made compact, easier read, easier work : saved mads object can summarised: returns: table can also useful: example adjusted MAD whz variable within limits 0.8 1.2 combinations region state. Note combined region state. avoid potential problems duplicate state names (.e. state name used one region). previous exercise used raw (.e. without flagging) data. better use data records flagging codes relevant whz cases oedema censored. national SMART survey use SMART flagging criteria. use national.SMART() function add SMART flags survey dataset: need exclude records flagging codes relevant whz: Note oedema recorded dataset exclude oedema cases. can now calculate MAD whz stratum: saved mads object can summarised: returns: analysis adjusted MAD whz variable within limits 0.8 1.2 combinations region state.","code":"sd(svy$whz) #> [1] 1.323469 sd(svy$whz[!(svy$flag %in% c(2, 3, 6, 7) | svy$oedema == 1)]) #> [1] 1.141944 sd(c(4.55, 5.93, 2.68, 5.61, 3.53, 4.78, 3.60, 5.82, 4.41, 5.42)) #> [1] 1.097533 sd(c(4.55, 5.93, 2.68, 5.61, 3.53, 7.84, 3.60, 5.82, 4.41, 5.42)) #> [1] 1.496963 mad(svy$whz) #> [1] 1.156428 mad(svy$whz[!(svy$flag %in% c(2, 3, 6, 7) | svy$oedema == 1)]) #> [1] 1.097124 svy <- read.table(\"flag.ex03.csv\", header = TRUE, sep = \",\") head(svy) #>   psu region state age sex weight height   haz   waz   whz #> 1   1     SE  Abia  12   2    7.4   72.1 -0.74 -1.58 -1.69 #> 2   1     SE  Abia  33   1   13.3   94.2  0.04 -0.33 -0.52 #> 3   1     SE  Abia  44   2   14.1   98.6 -0.41 -0.63 -0.57 #> 4   1     SE  Abia  40   2   15.8   99.3  0.39  0.59  0.55 #> 5   1     SE  Abia  23   2   10.1   83.9 -0.51 -0.90 -0.92 #> 6   1     SE  Abia  24   1   13.9   88.7  0.52  1.18  1.22 svy$regionState <- paste(svy$region, svy$state, sep = \":\") head(svy) #>   psu region state age sex weight height   haz   waz   whz regionState #> 1   1     SE  Abia  12   2    7.4   72.1 -0.74 -1.58 -1.69     SE:Abia #> 2   1     SE  Abia  33   1   13.3   94.2  0.04 -0.33 -0.52     SE:Abia #> 3   1     SE  Abia  44   2   14.1   98.6 -0.41 -0.63 -0.57     SE:Abia #> 4   1     SE  Abia  40   2   15.8   99.3  0.39  0.59  0.55     SE:Abia #> 5   1     SE  Abia  23   2   10.1   83.9 -0.51 -0.90 -0.92     SE:Abia #> 6   1     SE  Abia  24   1   13.9   88.7  0.52  1.18  1.22     SE:Abia table(svy$regionState) #>  #>       NC:Benue NC:FCT (Abuja)        NC:Kogi       NC:Kwara    NC:Nasarawa  #>            386            363            326            392            430  #>       NC:Niger     NC:Plateau     NE:Adamawa      NE:Bauchi       NE:Borno  #>            589            503            410            804            558  #>       NE:Gombe      NE:Taraba        NE:Yobe      NW:Jigawa      NW:Kaduna  #>            643            421            689            711            536  #>        NW:Kano     NW:Katsina       NW:Kebbi      NW:Sokoto     NW:Zamfara  #>            671            657            728            646            668  #>        SE:Abia     SE:Anambra      SE:Ebonyi       SE:Enugu         SE:Imo  #>            334            390            455            418            371  #>   SS:Akwa-Ibom     SS:Bayelsa SS:Cross River       SS:Delta         SS:Edo  #>            331            330            376            346            480  #>      SS:Rivers       SW:Ekiti       SW:Lagos        SW:Ogun        SW:Ondo  #>            315            376            640            566            426  #>        SW:Osun         SW:Oyo  #>            435            610 by(svy$whz, svy$regionState, mad, na.rm = TRUE) #> svy$regionState: NC:Benue #> [1] 0.941451 #> ------------------------------------------------------------  #> svy$regionState: NC:FCT (Abuja) #> [1] 0.96369 #> ------------------------------------------------------------  #> svy$regionState: NC:Kogi #> [1] 0.993342 #> ------------------------------------------------------------  #> svy$regionState: NC:Kwara #> [1] 0.993342 #> ------------------------------------------------------------  #> svy$regionState: NC:Nasarawa #> [1] 0.926625 #> ------------------------------------------------------------  #> svy$regionState: NC:Niger #> [1] 0.978516 #> ------------------------------------------------------------  #> svy$regionState: NC:Plateau #> [1] 1.022994 #> ------------------------------------------------------------  #> svy$regionState: NE:Adamawa #> [1] 1.045233 #> ------------------------------------------------------------  #> svy$regionState: NE:Bauchi #> [1] 1.18608 #> ------------------------------------------------------------  #> svy$regionState: NE:Borno #> [1] 1.030407 #> ------------------------------------------------------------  #> svy$regionState: NE:Gombe #> [1] 1.082298 #> ------------------------------------------------------------  #> svy$regionState: NE:Taraba #> [1] 1.008168 #> ------------------------------------------------------------  #> svy$regionState: NE:Yobe #> [1] 1.022994 #> ------------------------------------------------------------  #> svy$regionState: NW:Jigawa #> [1] 1.200906 #> ------------------------------------------------------------  #> svy$regionState: NW:Kaduna #> [1] 0.985929 #> ------------------------------------------------------------  #> svy$regionState: NW:Kano #> [1] 1.156428 #> ------------------------------------------------------------  #> svy$regionState: NW:Katsina #> [1] 1.022994 #> ------------------------------------------------------------  #> svy$regionState: NW:Kebbi #> [1] 0.926625 #> ------------------------------------------------------------  #> svy$regionState: NW:Sokoto #> [1] 0.926625 #> ------------------------------------------------------------  #> svy$regionState: NW:Zamfara #> [1] 1.052646 #> ------------------------------------------------------------  #> svy$regionState: SE:Abia #> [1] 0.904386 #> ------------------------------------------------------------  #> svy$regionState: SE:Anambra #> [1] 0.926625 #> ------------------------------------------------------------  #> svy$regionState: SE:Ebonyi #> [1] 0.904386 #> ------------------------------------------------------------  #> svy$regionState: SE:Enugu #> [1] 0.919212 #> ------------------------------------------------------------  #> svy$regionState: SE:Imo #> [1] 0.88956 #> ------------------------------------------------------------  #> svy$regionState: SS:Akwa-Ibom #> [1] 0.904386 #> ------------------------------------------------------------  #> svy$regionState: SS:Bayelsa #> [1] 1.11195 #> ------------------------------------------------------------  #> svy$regionState: SS:Cross River #> [1] 0.971103 #> ------------------------------------------------------------  #> svy$regionState: SS:Delta #> [1] 0.971103 #> ------------------------------------------------------------  #> svy$regionState: SS:Edo #> [1] 0.971103 #> ------------------------------------------------------------  #> svy$regionState: SS:Rivers #> [1] 1.052646 #> ------------------------------------------------------------  #> svy$regionState: SW:Ekiti #> [1] 1.030407 #> ------------------------------------------------------------  #> svy$regionState: SW:Lagos #> [1] 0.837669 #> ------------------------------------------------------------  #> svy$regionState: SW:Ogun #> [1] 0.911799 #> ------------------------------------------------------------  #> svy$regionState: SW:Ondo #> [1] 0.978516 #> ------------------------------------------------------------  #> svy$regionState: SW:Osun #> [1] 0.904386 #> ------------------------------------------------------------  #> svy$regionState: SW:Oyo #> [1] 0.956277 mads <- by(svy$whz, svy$regionState, mad, na.rm = TRUE) mads <- round(mads[1:length(mads)], 2) mads #> svy$regionState #>       NC:Benue NC:FCT (Abuja)        NC:Kogi       NC:Kwara    NC:Nasarawa  #>           0.94           0.96           0.99           0.99           0.93  #>       NC:Niger     NC:Plateau     NE:Adamawa      NE:Bauchi       NE:Borno  #>           0.98           1.02           1.05           1.19           1.03  #>       NE:Gombe      NE:Taraba        NE:Yobe      NW:Jigawa      NW:Kaduna  #>           1.08           1.01           1.02           1.20           0.99  #>        NW:Kano     NW:Katsina       NW:Kebbi      NW:Sokoto     NW:Zamfara  #>           1.16           1.02           0.93           0.93           1.05  #>        SE:Abia     SE:Anambra      SE:Ebonyi       SE:Enugu         SE:Imo  #>           0.90           0.93           0.90           0.92           0.89  #>   SS:Akwa-Ibom     SS:Bayelsa SS:Cross River       SS:Delta         SS:Edo  #>           0.90           1.11           0.97           0.97           0.97  #>      SS:Rivers       SW:Ekiti       SW:Lagos        SW:Ogun        SW:Ondo  #>           1.05           1.03           0.84           0.91           0.98  #>        SW:Osun         SW:Oyo  #>           0.90           0.96 summary(mads) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>  0.8400  0.9300  0.9800  0.9892  1.0300  1.2000 table(mads) #> mads #> 0.84 0.89  0.9 0.91 0.92 0.93 0.94 0.96 0.97 0.98 0.99 1.01 1.02 1.03 1.05 1.08  #>    1    1    4    1    1    4    1    2    3    2    3    1    3    2    3    1  #> 1.11 1.16 1.19  1.2  #>    1    1    1    1 svyFlagged <- national.SMART(x = svy, strata = \"regionState\") svyFlagged <- svyFlagged[!(svyFlagged$flagSMART %in% c(2, 3, 6, 7)), ] mads <- by(svyFlagged$whz, svyFlagged$regionState, mad, na.rm = TRUE) mads <- round(mads[1:length(mads)], 2) mads #> svyFlagged$regionState #>       NC:Benue NC:FCT (Abuja)        NC:Kogi       NC:Kwara    NC:Nasarawa  #>           0.92           0.95           0.99           0.96           0.92  #>       NC:Niger     NC:Plateau     NE:Adamawa      NE:Bauchi       NE:Borno  #>           0.93           1.02           1.02           1.17           1.02  #>       NE:Gombe      NE:Taraba        NE:Yobe      NW:Jigawa      NW:Kaduna  #>           1.06           0.98           0.99           1.17           0.96  #>        NW:Kano     NW:Katsina       NW:Kebbi      NW:Sokoto     NW:Zamfara  #>           1.10           1.01           0.90           0.90           1.02  #>        SE:Abia     SE:Anambra      SE:Ebonyi       SE:Enugu         SE:Imo  #>           0.87           0.91           0.90           0.90           0.87  #>   SS:Akwa-Ibom     SS:Bayelsa SS:Cross River       SS:Delta         SS:Edo  #>           0.87           1.05           0.92           0.95           0.96  #>      SS:Rivers       SW:Ekiti       SW:Lagos        SW:Ogun        SW:Ondo  #>           1.01           1.01           0.85           0.90           0.96  #>        SW:Osun         SW:Oyo  #>           0.89           0.95 summary(mads) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>  0.8500  0.9000  0.9600  0.9665  1.0100  1.1700"},{"path":"https://nutriverse.io/nipnTK/articles/ad.html","id":"measures-of-dispersion","dir":"Articles","previous_headings":"","what":"Measures of dispersion","title":"Distributions of variables and indices","text":"Measures dispersion summarise cases (e.g. children classified wasted, stunted, underweight) distributed across survey’s primary sampling units (e.g. clusters). retrieve survey dataset: file flag.ex01.csv comma-separated-value (CSV) file containing anthropometric data recent SMART survey Sudan. apply flagging criteria data: exclude flagged records: apply case-definition stunted: can examine distribution stunted cases across primary sampling units survey: need counts cases primary sampling unit:  useful keep later use: interested cases distributed across primary sampling units. three general patterns. random, clumped, uniform. can identify pattern example data likely belongs using index dispersion. simplest index dispersion, one used SMART, variance mean ratio: \\[ \\text{Variance mean ratio} ~ = ~ \\frac{s ^ 2}{\\overline{\\chi}} \\] interpretation variance mean ratio straightforward: Variance mean ratio ≈ 1 Random Variance mean ratio > 1 Clumped (.e. clumped random) Variance mean ratio < 1 Uniform (.e. uniform random) value variance mean ratio can range zero (maximum uniformity) total number cases data (maximum clumping). Maximum uniformity found number cases found every primary sampling unit. Maximum clumping found cases found one primary sampling unit. example data: observed variance mean ratio (0.6393127) suggests distribution cases across primary sampling units completely uniform, neither random. formal (Chi-squared) test can performed. Chi-squared test statistic can calculated using: returns: 18.54007 critical values test statistic can found using: returns: 16.04707 45.72229 Chi-squared test statistic 16.04707 conclude pattern cases across primary sampling units example data uniform. case example data. Chi-squared test statistic 45.72229 conclude pattern cases across primary sampling units example data clumped. case example data. Since Chi-squared test statistic falls 16.04707 45.72229 conclude pattern cases across primary sampling units example data random. problems variance mean ratio. clearly non-random patterns can produce variance mean ratios one. variance mean ratio also strongly influenced total number cases present data clumping present. better measure Green’s Index Dispersion: \\[ \\text{Green's Index} ~ = ~  \\frac{ \\left ( \\frac{s ^ 2}{\\overline{\\chi}} \\right ) ~ - ~ 1}{n ~ - ~ 1} \\] Green’s Index corrects variance mean ratio total number cases present data. value Green’s Index can range $ -1 / (n - 1) $ maximum uniformity (specific dataset) one maximum clumping. interpretation Green’s Index straightforward: Green’s Index ≈ 0 Random Green’s Index > 0 Clumped (.e. clumped random) Green’s Index < 0 Uniform (.e. uniform random) sampling distribution Green’s Index well described. NiPN data quality toolkit provides greenIndex() function overcomes problem. R language function uses bootstrap technique estimate Green’s Index test whether distribution cases across primary sampling units random. greenIndex() function requires specify name survey dataset, name variable specifying primary sampling unit, name variable specifying case status. example data: returns: point estimate Green’s Index (-0.0013) zero p-value test random distribution cases across primary sampling units (0.0040) 0.05. distribution cases across primary sampling units example data significantly uniform random. can see graphically using:  dashed line plot marks mean number cases found primary sampling unit. uniform distribution show bars ending close line (see figure ). SMART uses variance mean ratio test data quality. Green’s Index robust choice can used compare samples vary overall sample size number sampling units used. idea behind using measure dispersion judge data quality belief distribution cases malnutrition across primary sampling units always random. case data considered suspect. problem approach deviations random can reflect true distribution cases survey area. may occur survey area comprises, example, one livelihood zone. also less likely case conditions, wasting oedema, associated infectious disease may clumped randomly distributed across primary sampling units. may become particular problem proximity sampling used collect within-cluster samples. Measures dispersion problematic used measures data quality interpreted caution. exception rule finding maximum, almost maximum, uniformity maximum, almost maximum, clumping. finding maximum uniformity likely data fabricated. finding maximum clumping may indicate poor data collection / poor data management.","code":"svy <- read.table(\"flag.ex01.csv\", header = TRUE, sep = \",\")  head(svy) #>   psu child age sex weight height muac oedema   haz   waz   whz #> 1   1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03 #> 2   1     2  13   2    6.4   70.4  116      2 -1.83 -3.04 -2.93 #> 3   1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25 #> 4   1     4  15   1    7.2   75.4  130      2 -1.48 -3.22 -3.57 #> 5   1     5  15   1    7.4   70.0  124      2 -3.61 -2.99 -1.61 #> 6   1     6  18   2    7.7   70.6  130      2 -3.48 -2.40 -0.82 svy$flag <- 0 svy$flag <- ifelse(!is.na(svy$haz) & (svy$haz < -6 | svy$haz > 6), svy$flag + 1, svy$flag) svy$flag <- ifelse(!is.na(svy$whz) & (svy$whz < -5 | svy$whz > 5), svy$flag + 2, svy$flag) svy$flag <- ifelse(!is.na(svy$waz) & (svy$waz < -6 | svy$waz > 5), svy$flag + 4, svy$flag) svy <- svy[svy$flag == 0, ] svy$stunted <- ifelse(svy$haz < -2, 1, 2) table(svy$psu, svy$stunted) #>      #>       1  2 #>   1   8 20 #>   2  11 14 #>   3   7 22 #>   4   6 23 #>   5   7 15 #>   6  11 20 #>   7  11 14 #>   8  14 12 #>   9  12 18 #>   10 10  9 #>   11 12 16 #>   12  9 13 #>   13  9 13 #>   14  5 21 #>   15 12  9 #>   16  8 17 #>   17  6 23 #>   18  8 21 #>   19 10 12 #>   20  6 20 #>   21 11 18 #>   22 11 14 #>   23 12  6 #>   24  8 15 #>   25 10 19 #>   26 10  8 #>   27 12  9 #>   28  6 14 #>   29 14 10 #>   30 11 18 table(svy$psu, svy$stunted)[,1] #>  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26  #>  8 11  7  6  7 11 11 14 12 10 12  9  9  5 12  8  6  8 10  6 11 11 12  8 10 10  #> 27 28 29 30  #> 12  6 14 11 barplot(table(svy$psu, svy$stunted)[,1], xlab = \"PSU\", ylab = \"Cases\", cex.names = 0.5) casesPerPSU <- table(svy$psu, svy$stunted)[,1]  casesPerPSU #>  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26  #>  8 11  7  6  7 11 11 14 12 10 12  9  9  5 12  8  6  8 10  6 11 11 12  8 10 10  #> 27 28 29 30  #> 12  6 14 11 varianceCasesPerPSU <- var(casesPerPSU) meanCasesPerPSU <- sum(casesPerPSU) / length(casesPerPSU)  V2M <- varianceCasesPerPSU / meanCasesPerPSU V2M #> [1] 0.6393127 sum((casesPerPSU - meanCasesPerPSU)^2) / meanCasesPerPSU #> [1] 18.54007 qchisq(p = c(0.025, 0.975), df = length(casesPerPSU) - 1) qchisq(p = c(0.025, 0.975), df = length(casesPerPSU) - 1) greensIndex(data = svy, psu = \"psu\", case = \"stunted\") #>  #>  Green's Index of Dispersion #>  #> Green's Index (GI) of Dispersion  = -0.0013, 95% CI = (-0.0022, -0.0004) #> Maximum uniformity for this data  = -0.0035 #>                          p-value  =  0.0000 table(svy$psu, svy$stunted)[,1] #>  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26  #>  8 11  7  6  7 11 11 14 12 10 12  9  9  5 12  8  6  8 10  6 11 11 12  8 10 10  #> 27 28 29 30  #> 12  6 14 11 barplot(table(svy$psu, svy$stunted)[,1], xlab = \"PSU\", ylab = \"Cases\", cex.names = 0.5) abline(h = sum(casesPerPSU) / length(casesPerPSU), lty = 2)"},{"path":"https://nutriverse.io/nipnTK/articles/ah.html","id":"summarising-tabulating-and-visualising-age-data","dir":"Articles","previous_headings":"","what":"Summarising, tabulating, and visualising age data","title":"Age heaping","text":"variable interest age (age months): Tables can difficult use ungrouped age data usually many different values: fullTable() function NiPN data-quality toolkit preferred include values zero counts: used fullTable() function returns table containing cells every value specified values parameter. returned table also contain cells values specified values parameter. default values parameter range variable tabulated. means values parameter can sometimes omitted: Omitting values parameter works reliably numeric variables containing whole numbers. variable tabulated character variable numeric variable containing one numbers decimal places specify values parameter. graphical analysis usually informative tabular analysis:  expect ages present roughly equal frequency frequency reducing slowly age due mortality. can see marked age-heaping 12, 18, 24, 30, 36, 48 months (see figure ). common age reported mothers. tendency mothers carers round ages whole years half years. Note used values = 6:59 fullTable() function NiPN data quality toolkit. range values present age variable.","code":"summary(svy$age) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>     6.0    18.0    30.0    30.4    42.0    59.0 table(svy$age) #>  #>  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31  #> 10 17 25 13 19 23 38 11 11 17  9 14 26  9 17 14 24 12 31  8 13  9 21 14 38 14  #> 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57  #> 16 23 22 18 57  8 13  9 11 12 19 10 13 14 12 14 44  6  9  6  5  8 12 13 12  8  #> 58 59  #> 13  9 fullTable(svy$age, values = 6:59) #>  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31  #> 10 17 25 13 19 23 38 11 11 17  9 14 26  9 17 14 24 12 31  8 13  9 21 14 38 14  #> 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57  #> 16 23 22 18 57  8 13  9 11 12 19 10 13 14 12 14 44  6  9  6  5  8 12 13 12  8  #> 58 59  #> 13  9 fullTable(svy$age) #>  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31  #> 10 17 25 13 19 23 38 11 11 17  9 14 26  9 17 14 24 12 31  8 13  9 21 14 38 14  #> 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57  #> 16 23 22 18 57  8 13  9 11 12 19 10 13 14 12 14 44  6  9  6  5  8 12 13 12  8  #> 58 59  #> 13  9 barplot(fullTable(svy$age, values = 6:59),          xlab = \"Age (months)\", ylab = \"Frequency\", las = 3, cex.names = 0.6)"},{"path":"https://nutriverse.io/nipnTK/articles/ah.html","id":"age-heaping-in-children","dir":"Articles","previous_headings":"","what":"Age heaping in children","title":"Age heaping","text":"Age heaping can seriously affect survey results indices include age component (e.g. height- -age weight-age). effect important systematic rounding systematic rounding . Systematic rounding can lead bias. rounding systematically indices biased upwards prevalence biased downwards. rounding systematically indices biased downwards prevalence biased upwards. useful way looking age heaping age recorded months examine remainders ages divided 12. R language provides special operator (%%) help :  NiPN data quality toolkit provides R language function called ageHeaping() performs age-heaping analysis. Applying function example data: returns: output ageHeaping() function can saved later use: saved output contains Chi-squared test frequency tables final digits (counts percentages). can accessed using: saved results may also plotted: resulting plot shown .  ageHeaping() function assumes want examine remainder dividing twelve. useful working ages recorded months. may also useful use divisors, examining remainder dividing six:  shows extent age heaping whole half-years (see figure ).","code":"rem <- svy$age %% 12 remTable <- fullTable(rem, values = 0:11) remTable #>   0   1   2   3   4   5   6   7   8   9  10  11  #> 170  33  46  41  46  48 105  63  83  72  90  76 prop.table(remTable) * 100 #>         0         1         2         3         4         5         6         7  #> 19.473081  3.780069  5.269187  4.696449  5.269187  5.498282 12.027491  7.216495  #>         8         9        10        11  #>  9.507446  8.247423 10.309278  8.705613 barplot(remTable, xlab = \"Age (months) %% 12\", ylab = \"Frequency\")  abline(h = sum(remTable / 12), lty = 3) chisq.test(remTable) #>  #>  Chi-squared test for given probabilities #>  #> data:  remTable #> X-squared = 214.96, df = 11, p-value < 2.2e-16 ageHeaping(svy$age) #>  #>  Age-heaping Analysis #>  #> data:    Remainder of svy$age / 12 #> X-squared = 214.9588, df = 11, p-value = 0.0000 ah12 <- ageHeaping(svy$age) ah12 #>  #>  Age-heaping Analysis #>  #> data:    Remainder of svy$age / 12 #> X-squared = 214.9588, df = 11, p-value = 0.0000 ah12$X2 #> X-squared  #>  214.9588 ah12$df #> df  #> 11 ah12$p #> [1] 5.791598e-40 ah12$tab #> Remainder of svy$age / 12 #>   0   1   2   3   4   5   6   7   8   9  10  11  #> 170  33  46  41  46  48 105  63  83  72  90  76 ah12$pct #> Remainder of svy$age / 12 #>    0    1    2    3    4    5    6    7    8    9   10   11  #> 19.5  3.8  5.3  4.7  5.3  5.5 12.0  7.2  9.5  8.2 10.3  8.7 plot(ah12, main = \"Age-heaping (remainder of age / 12)\") ah6 <- ageHeaping(svy$age, divisor = 6)  print(ah6) #>  #>  Age-heaping Analysis #>  #> data:    Remainder of svy$age / 6 #> X-squared = 145.0275, df = 5, p-value = 0.0000 plot(ah6)"},{"path":"https://nutriverse.io/nipnTK/articles/ah.html","id":"age-heaping-in-adults","dir":"Articles","previous_headings":"","what":"Age heaping in adults","title":"Age heaping","text":"Using ten five divisors can useful dealing data adults ages recorded whole years. example: file ah.ex01.csv comma-separated-value (CSV) file containing anthropometric data Rapid Assessment Method Older People (RAM-OP) survey Dadaab refugee camp Garissa, Kenya. survey people aged sixty years older. variable interest age (age years): Care exercised specifying divisor use analysis age heaping. calendars use base ten. Amongst Han Chinese, example, age heaping may occur twelve-year cycle corresponding preferred animal years Chinese calendar. analysis age heaping concentrates specific digits (e.g. zero five) decimal intervals appropriate populations. advisable, therefore use simple tabulation visualisation techniques heap decide appropriate divisor. example data:  shows age-heaping decades half-decades (see figure ). survey using divisor 10 appropriate:  pronounced age heaping decades , lesser extent, half-decades data (see figure ). may also useful use divisors, examining remainder dividing five:  shows extent age heaping whole half decades (see figure ).","code":"svy <- read.table(\"ah.ex01.csv\", header = TRUE, sep = \",\")  head(svy) svy <- ah.ex01  head(svy) summary(svy$age) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>     6.0    18.0    30.0    30.4    42.0    59.0 summary(svy$age) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>     6.0    18.0    30.0    30.4    42.0    59.0 fullTable(svy$age) #>  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31  #> 10 17 25 13 19 23 38 11 11 17  9 14 26  9 17 14 24 12 31  8 13  9 21 14 38 14  #> 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57  #> 16 23 22 18 57  8 13  9 11 12 19 10 13 14 12 14 44  6  9  6  5  8 12 13 12  8  #> 58 59  #> 13  9 barplot(fullTable(svy$age),          xlab = \"Age (years)\", ylab = \"Frequency\", las = 3, cex.names = 0.6) ah10 <- ageHeaping(svy$age, divisor = 10)  print(ah10) #>  #>  Age-heaping Analysis #>  #> data:    Remainder of svy$age / 10 #> X-squared = 70.31042, df = 9, p-value = 0.0000 plot(ah10) ah5 <- ageHeaping(svy$age, divisor = 5)  print(ah5) #>  #>  Age-heaping Analysis #>  #> data:    Remainder of svy$age / 5 #> X-squared = 10.39633, df = 4, p-value = 0.0343 plot(ah5)"},{"path":"https://nutriverse.io/nipnTK/articles/as.html","id":"age-and-sex-distributions-childrens-data","dir":"Articles","previous_headings":"","what":"Age and sex distributions (children’s data)","title":"Age and sex distributions","text":"Age heaping tendency report children’s ages nearest year adult ages nearest multiple 5 10 years. Age heaping common. major reason data nutritional anthropometry surveys often analysed reported using broad age-groups. commonest age-groups used children’s data 6 17 months, 18 29 months, 30 41 months, 42 53 months, 54 59 months (see figure ). known year-centred age-groups. Note last age-group covers six months nominally centred five years. age-groups may used specific analyses. techniques presented can adapted work age- groups.  retrieve survey dataset: dataset dp.ex02 comma-separated-value (CSV) file containing anthropometric data SMART survey Kabul, Afghanistan.","code":"svy <- read.table(\"dp.ex02.csv\", header = TRUE, sep = \",\")  head(svy) #>   psu age sex weight height muac oedema #> 1   1   6   1    7.3   65.0  146      2 #> 2   1  42   2   12.5   89.5  156      2 #> 3   1  23   1   10.6   78.1  149      2 #> 4   1  18   1   12.8   81.5  160      2 #> 5   1  52   1   12.1   87.3  152      2 #> 6   1  36   2   16.9   93.0  190      2"},{"path":"https://nutriverse.io/nipnTK/articles/as.html","id":"tabulation-and-visualisation","dir":"Articles","previous_headings":"Age and sex distributions (children’s data)","what":"Tabulation and visualisation","title":"Age and sex distributions","text":"NiPN data quality toolkit provides R language function called recode() makes easy recode group data. use recode() function group data age variable (age months) year-centred age-groups. tabular analysis can performed: table() function performs cross-tabulation. first variable specified (svy$ycag example) row variable. second variable specified (svy$sex example) column variable.  useful examine row percentages column percentages tables age-group sex. look row percentages: returns: shows approximately equal proportions males females year-centred age-group. specified margin = 1 prop.table() function wanted row percentages. also look column percentages: returns: expect approximately equal proportions children age-groups centred 1, 2, 3, 4 years smaller proportion (.e. half age-groups) age-group centred 5 years. specified margin = 2 prop.table() function wanted column percentages. graphical analysis using population pyramid can useful. NiPN data quality toolkit provides R language function called pyramid.plot() plotting population pyramids:  can make informative plot specifying title axis labels:  applying shading:  colours:  expect approximately equal numbers children age-groups centred 1, 2, 3, 4 years smaller number (.e. half number age-groups) age-group centred 5 years. also approximately equal numbers males females. see population pyramid .  pyramid.plot() function uses values grouped age variable y-axis value labels. can assign descriptive text values using recode() function. example:  can also use factor type variable. type variable allows labels specified:  cut() function may also used:  cut() function versatile grouping function. explained detail later section. cex.names parameter pyramid.plot() function allows us change size value labels y-axis. value cex.names magnification factor. Values one make labels larger default. Values one make labels smaller default.","code":"svy$ycag <- recode(svy$age, \"6:17=1; 18:29=2; 30:41=3; 42:53=4; 54:59=5\") head(svy) #>   psu age sex weight height muac oedema ycag #> 1   1   6   1    7.3   65.0  146      2    1 #> 2   1  42   2   12.5   89.5  156      2    4 #> 3   1  23   1   10.6   78.1  149      2    2 #> 4   1  18   1   12.8   81.5  160      2    2 #> 5   1  52   1   12.1   87.3  152      2    4 #> 6   1  36   2   16.9   93.0  190      2    3 table(svy$ycag, svy$sex)  #>     #>       1   2 #>   1 101 106 #>   2 102  96 #>   3 126 115 #>   4  78  82 #>   5  31  36 prop.table(table(svy$ycag, svy$sex)) * 100 #>     #>             1         2 #>   1 11.569301 12.142039 #>   2 11.683849 10.996564 #>   3 14.432990 13.172967 #>   4  8.934708  9.392898 #>   5  3.550974  4.123711 prop.table(table(svy$ycag, svy$sex), margin = 1) * 100 #>     #>            1        2 #>   1 48.79227 51.20773 #>   2 51.51515 48.48485 #>   3 52.28216 47.71784 #>   4 48.75000 51.25000 #>   5 46.26866 53.73134 prop.table(table(svy$ycag, svy$sex), margin = 2) * 100 #>     #>             1         2 #>   1 23.059361 24.367816 #>   2 23.287671 22.068966 #>   3 28.767123 26.436782 #>   4 17.808219 18.850575 #>   5  7.077626  8.275862 pyramid.plot(svy$ycag, svy$sex) pyramid.plot(svy$ycag, svy$sex,               main = \"Distribution of age by sex\",              xlab = \"Frequency (Males | Females)\",               ylab = \"Year-centred age-group\") pyramid.plot(svy$ycag, svy$sex,               main = \"Distribution of age by sex\",              xlab = \"Frequency (Males | Females)\",               ylab = \"Year-centred age-group\",              col = c(\"grey80\", \"white\")) pyramid.plot(svy$ycag, svy$sex,               main = \"Distribution of age by sex\",              xlab = \"Frequency (Males | Females)\",               ylab = \"Year-centred age-group\",              col = c(\"lightblue\", \"pink\")) pyramid.plot(svy$ycag, svy$sex,               main = \"Distribution of age by sex\",              xlab = \"Frequency (Males | Females)\",               ylab = \"Year-centred age-group\") svy$ageLabel <- recode(svy$age, \"6:29='< 30 months'; 30:hi='30 month or older'\") #> Warning in recode(svy$age, \"6:29='< 30 months'; 30:hi='30 month or older'\"): NAs #> introduced by coercion  pyramid.plot(svy$ageLabel,               svy$sex,               main = \"Distribution of age by sex\",               xlab = \"Frequency (Males | Females)\",               ylab = \"Age-group\") svy$ageLabel <- factor(svy$ycag,                        labels = c(\"6:17\", \"18:29\", \"30:41\", \"42:53\", \"54:59\"))  pyramid.plot(svy$ageLabel,               svy$sex,               main = \"Distribution of age by sex\",               xlab = \"Frequency (Males | Females)\",               ylab = \"Year-centred age-group\") svy$ageCuts <- cut(svy$age, breaks = c(0, 17, 29, 41, 53, 59))  pyramid.plot(svy$ageCuts,               svy$sex,               main = \"Age-group (months) \",              xlab = \"Frequency (Males | Females)\",               ylab = \"Year-centred age-group\",              cex.names = 0.9)"},{"path":"https://nutriverse.io/nipnTK/articles/as.html","id":"simple-testing","dir":"Articles","previous_headings":"","what":"Simple testing","title":"Age and sex distributions","text":"possible perform formal test distribution age-groups sex. simple test : yields: example p-value 0.05 accept null hypothesis significant association age sex. important test tests whether distribution ages similar males females. , however, test whether age structure sample meets expectations. requires test compares observed numbers expected numbers derived external source (e.g. census data) demographic model.","code":"chisq.test(table(svy$ycag, svy$sex)) #>  #>  Pearson's Chi-squared test #>  #> data:  table(svy$ycag, svy$sex) #> X-squared = 1.2675, df = 4, p-value = 0.8669"},{"path":"https://nutriverse.io/nipnTK/articles/as.html","id":"a-model-of-the-expected-age-structure","dir":"Articles","previous_headings":"Simple testing","what":"A model of the expected age structure","title":"Age and sex distributions","text":"simple model-based method calculating expected numbers exponential decay population births deaths balance 1:1 male female sex ratio. model proportion surviving group year can calculated : \\[ p ~ = ~ e ^ {-zt} \\] e base natural logarithm (approximately 2.7183), z mortality rate associated time period, t time. Time (t) starts zero purposes computation. Age can used measure time since birth. use 0 first year-centred age-group, 1 second year-centred age-group, -. rationale us using t <- 0:4 . five year-centred age-groups mortality rate 1 / 10,000 / day, expected proportions surviving year can calculated : yields following survival probabilities: need specify duration (.e. number years) represented age-group: can calculate expected proportions children age-group: gives: can now calculate expected numbers: giving: formal test compare observed numbers expected numbers. observed numbers can found using: gives: can useful examine observed expected numbers graphically:  calculate Chi-squared test statistic: \\[ \\chi ^ 2 ~ = ~ \\sum \\frac{(\\text{observed} - \\text{expected}) ^ 2}{\\text{expected}} \\] using: yields Chi-Squared test statistic : can find p-value using: gives: example age distribution significantly different expected numbers calculated using simple demographic model. Note specify degrees freedom (df) Chi-Squared test number age-groups minus one. five age-groups specify df = 4. degrees freedom (df) need specify depend number age-groups use. always number age-groups minus one. , example, ten age-groups need specify df = 9. NiPN data quality toolkit provides R function called ageChildren() performs model- based Chi-Squared test: returns: Note specified five years mortality rate 1 / 10,000 / day using u5mr = 1. Another, appropriate, rate may specified. ageChildren() function calculates year-centred age-groups children aged six fifty-nine months default. standard survey population used SMART many surveys. use year-centred age-groups also standard practice. commands given can, however, adapted use different age-groups. output ageChildren() function can saved later use: saved output contains Chi-squared test results tables observed expected values. can accessed using: saved results may also plotted:  ageChildren() function can applied sex separately. males:  females:  easier way : test statistics interpreted caution. significant test result may, example, due use inappropriate model generate expected numbers. significant result particular test may due : Specifying inappropriate five years mortality rate: particular problem specified five years mortality rate assumed applied five years prior data collected. assumption 1:1 male female sex ratio: particular problem setting sex-selective abortion sex-selective infanticide. model crude. Mortality related age. Younger children greater mortality risk older children average five years mortality rate used. sophisticated model used , many settings, data required use model. also noted sample sizes used survey can cause tests yield statistically significant results small differences observed expected numbers.","code":"z <- (1 / 10000) * 365.25   t <- 0:4  p <- exp(-z * t)  p z <- (1 / 10000) * 365.25   t <- 0:4  p <- exp(-z * t)  p #> [1] 1.0000000 0.9641340 0.9295544 0.8962149 0.8640713 d <- c(1, 1, 1, 1, 0.5) ep <- d * p / sum(d * p)   ep #> [1] 0.2368580 0.2283628 0.2201724 0.2122757 0.1023311 expected <- ep * sum(table(svy$ycag)) names(expected) <- 1:5  expected #>         1         2         3         4         5  #> 206.77703 199.36076 192.21049 185.31667  89.33505 observed <- table(svy$ycag)   observed #>  #>   1   2   3   4   5  #> 207 198 241 160  67 par(mfcol = c(1, 2)) barplot(observed, main = \"Observed\", xlab = \"Age group\", ylab = \"Frequency\", ylim = c(0, 250)) barplot(expected, main = \"Expected\", xlab = \"Age group\", ylab = \"Frequency\", ylim = c(0, 250)) X2 <- sum((observed - expected) ^ 2 / expected) pchisq(X2, df = 4, lower.tail = FALSE) #> [1] 0.000259395 ageChildren(svy$age, u5mr = 1) #>  #>  Age Test (Children) #>  #> X-squared = 21.4366, df = 4, p = 0.0003 ac <- ageChildren(svy$age, u5mr = 1) ac #>  #>  Age Test (Children) #>  #> X-squared = 21.4366, df = 4, p = 0.0003  ac$X2 #> [1] 21.43662  ac$df #> [1] 4  ac$p  #> [1] 0.000259395  ac$observed  #>   1   2   3   4   5  #> 207 198 241 160  67  ac$expected #>         1         2         3         4         5  #> 206.77703 199.36076 192.21049 185.31667  89.33505 plot(ac) acM <- ageChildren(svy$age[svy$sex == 1], u5mr = 1)   acM #>  #>  Age Test (Children) #>  #> X-squared = 15.8496, df = 4, p = 0.0032  plot(acM) acF <- ageChildren(svy$age[svy$sex == 2], u5mr = 1)   acF #>  #>  Age Test (Children) #>  #> X-squared = 6.8429, df = 4, p = 0.1444  plot(acF) by(svy$age, svy$sex, ageChildren, u5mr = 1) #> svy$sex: 1 #>  #>  Age Test (Children) #>  #> X-squared = 15.8496, df = 4, p = 0.0032 #>  #> ------------------------------------------------------------  #> svy$sex: 2 #>  #>  Age Test (Children) #>  #> X-squared = 6.8429, df = 4, p = 0.1444"},{"path":"https://nutriverse.io/nipnTK/articles/as.html","id":"use-of-census-data","dir":"Articles","previous_headings":"","what":"Use of census data","title":"Age and sex distributions","text":"use simple demographic models far ideal. usually better calculate expected proportions census data. useful source census data United States Census Bureau’s International Data Base: https://www.census.gov/data-tools/demo/idb/informationGateway.php population single year age-groups 0, 1, 2, 3, 4 years Afghanistan 2015 : can calculate expected values data: sample size \\(n = 900\\) expected number age-group : expected values can used Chi-squared test illustrated . Census data may also used estimate five years’ mortality rate (U5MR) can used ageChildren() function. model exponential decay population births deaths balance 1:1 male female sex ratio: \\[ p ~ = ~ e ^ {-zt} \\] means can, given age-distribution, estimate mortality fitting model: \\[ \\log_e(n) ~ = ~ \\alpha ~ + ~ \\beta t \\] \\(n\\) count children age-group. absolute value β coefficient point estimate mortality rate (z). Using 2015 population data Afghanistan: gives: value reported t \\(\\beta\\) coefficient (-0.04571). absolute value \\(\\beta\\) coefficient (.e. value without sign) 0.04571. point estimate mortality rate. Expressed number deaths / 10,000 persons / day: : can use estimate ageChildren() function:","code":"pop <- c(1148379, 1062635, 1015688, 981288, 950875)  ep <- pop / sum(pop) expected <- ep * 900 expected #> [1] 200.3427 185.3841 177.1939 171.1925 165.8868 t <- 0:4  lm(log(pop) ~ t) #>  #> Call: #> lm(formula = log(pop) ~ t) #>  #> Coefficients: #> (Intercept)            t   #>    13.93601     -0.04571 (0.04571 / 365.25) * 10000 #> [1] 1.251472 ageChildren(svy$age, u5mr = 1.251472) #>  #>  Age Test (Children) #>  #> X-squared = 20.4744, df = 4, p = 0.0004"},{"path":"https://nutriverse.io/nipnTK/articles/as.html","id":"the-age-ratio","dir":"Articles","previous_headings":"Use of census data","what":"The age ratio","title":"Age and sex distributions","text":"much simpler less problematic age-related test survey data quality age ratio test. age ratio defined : \\[ \\text{Age ratio} ~ = ~ \\frac{\\text{number children aged 6 29 months}}{\\text{number children aged 30 59 months}} \\] use recode() function NiPN data quality toolkit create relevant age-groups: observed age ratio : gives: often easier work proportions ratios need calculate proportion younger age-group: gives: can calculate expected value using census data simple demographic model. simplest approach use standard value. SMART surveys often use ratio 0.85:1. need calculate expected proportion younger group. ratio 0.85:1 : gives: observed proportion (0.4639175) expected proportion (0.4594595) similar formal test statistical significance required case. Formal testing can done using Chi-squared test: returns: age ratio example data significantly different expected age ratio. NiPN data quality toolkit provide R function called ageRatioTest() performs age ratio test: returns: ratio parameter ageRatioTest() function allows specify expected age ratio 0.85:1. Note ageRatioTest() function applies test data children aged 6 59 months (ages ignored). age ratio test might applied data sexes () sex separately: example data meets expectations regarding age ratio children male female children separately.","code":"svy$ageGroup <- recode(svy$age, \"6:29=1; 30:59=2\") head(svy) #>   psu age sex weight height muac oedema ycag ageLabel ageCuts ageGroup #> 1   1   6   1    7.3   65.0  146      2    1     6:17  (0,17]        1 #> 2   1  42   2   12.5   89.5  156      2    4    42:53 (41,53]        2 #> 3   1  23   1   10.6   78.1  149      2    2    18:29 (17,29]        1 #> 4   1  18   1   12.8   81.5  160      2    2    18:29 (17,29]        1 #> 5   1  52   1   12.1   87.3  152      2    4    42:53 (41,53]        2 #> 6   1  36   2   16.9   93.0  190      2    3    30:41 (29,41]        2 sum(svy$ageGroup == 1) / sum(svy$ageGroup == 2) #> [1] 0.8653846 sum(svy$ageGroup == 1) / sum(table(svy$ageGroup)) #> [1] 0.4639175 p <- 0.85 / (0.85 + 1) #> [1] 0.4594595 prop.test(sum(svy$ageGroup == 1), sum(table(svy$ageGroup)), p = 0.4594595) #>  #>  1-sample proportions test with continuity correction #>  #> data:  sum(svy$ageGroup == 1) out of sum(table(svy$ageGroup)), null probability 0.4594595 #> X-squared = 0.053062, df = 1, p-value = 0.8178 #> alternative hypothesis: true p is not equal to 0.4594595 #> 95 percent confidence interval: #>  0.4304994 0.4976573 #> sample estimates: #>         p  #> 0.4639175 ageRatioTest(svy$age, ratio = 0.85) #>  #>      Age Ratio Test (children's data) #>  #>                     Expected age ratio = 0.8500 #> Expected proportion aged 6 - 29 months = 0.4595 #>  #>                     Observed age ratio = 0.8654 #> Observed proportion aged 6 - 29 months = 0.4639 #>  #> X-squared = 0.0531, p = 0.8178 by(svy$age, svy$sex, ageRatioTest, ratio = 0.85) #> svy$sex: 1 #>  #>      Age Ratio Test (children's data) #>  #>                     Expected age ratio = 0.8500 #> Expected proportion aged 6 - 29 months = 0.4595 #>  #>                     Observed age ratio = 0.8638 #> Observed proportion aged 6 - 29 months = 0.4635 #>  #> X-squared = 0.0145, p = 0.9041 #>  #> ------------------------------------------------------------  #> svy$sex: 2 #>  #>      Age Ratio Test (children's data) #>  #>                     Expected age ratio = 0.8500 #> Expected proportion aged 6 - 29 months = 0.4595 #>  #>                     Observed age ratio = 0.8670 #> Observed proportion aged 6 - 29 months = 0.4644 #>  #> X-squared = 0.0247, p = 0.8750"},{"path":"https://nutriverse.io/nipnTK/articles/as.html","id":"age-and-sex-distributions-adults-and-general-population-surveys","dir":"Articles","previous_headings":"","what":"Age and sex distributions : Adults and general population surveys","title":"Age and sex distributions","text":"key test survey quality whether survey data represents population terms age sex distribution. can test comparison census data. retrieve example data: data taken household rosters collected part household survey Tanzania. use census data taken Wolfram|Alpha knowledge engine: http://www.wolframalpha.com/input/?=Tanzania+age+distribution Another useful source census data United States Census Bureau’s International Data Base: https://www.census.gov/data-tools/demo/idb/informationGateway.php pyramid plot produced Wolfram|Alpha shown figure .  table produced Wolfram|Alpha downloaded stored CSV file: age-groups expressed using form specified ISO 31-11, international standard applies mathematical symbols. form [,b) expresses interval \\(≤ x < b\\). example, [30,35) used indicate set {30, 31, 32, 33, 34} ages years. form [,b) said closed left open right. reference data (ref) uses five-year age-groups. create age-groups example dataset. first check range ages example data: returns: R language provides function makes easy create ISO 31-11 groupings raw data: Using include.lowest = TRUE tells cut() function include lowest breaks value (zero case). Using right = FALSE tells cut() function use groupings closed left. combination parameters creates “closed left” “open right” age-groups used reference (ref) data: tabular analysis age-group sex can produced using: visual inspection useful:  can make easier read:  Note specified ylab = \"\" clear category labels represent age-groups prevent y-axis label obscuring category labels, happens :  possible alter number lines text margins plot, reduce size age-group labels, place y-axis label specific line left margin plot order make clearer plot:  easiest way checking whether survey data represents general population terms age sex distribution compare observed (figure right) expected (figure left) distributions.  general shapes two distributions similar. lumpiness figure right due age heaping adult ages decades half-decades:  formal test age structure can made comparing observed expected numbers. can graphically:  observed expected numbers similar . lumpiness observed numbers due age heaping. See Figure ASA04. Formal testing can performed: gives: warning due small expected numbers (.e. n < 5) older age-groups. R provides robust “Monte Carlo” test: may take seconds compute yields: test results need interpreted caution. sample size (\\(n = 8736\\)) large example. means small differences, may due age heaping, become statistically significant. test considered good evidence age-structure sample differs expected age-structure population. also need examine sex ratio sample. sex ratio test can performed using sexRatioTest() function NiPN data quality toolkit sex ratio observed census data: yields: evidence sex ratio sample differs much expected sex ratio population. techniques outlined section illustrative. many surveys, nutritional anthropometry surveys young children, standardised. survey may sample women child-bearing age. sample may restricted women aged 15 45 years. case age-structure can examined using techniques outlined make sense examine sex ratio. Care taken examining data surveys may deliberately oversampled specific age-groups.","code":"svy <- read.table(\"as.ex01.csv\", header = TRUE, sep = \",\")  head(svy) #>   age sex #> 1  44   2 #> 2   1   2 #> 3  15   2 #> 4   7   1 #> 5  14   1 #> 6  14   1 ref <- read.table(\"as.ex02.csv\", header = TRUE, sep = \",\") ref #>         age   Males Females     All #> 1     [0,5) 4043000 3969000 8012000 #> 2    [5,10) 3336000 3284000 6620000 #> 3   [10,15) 2775000 2742000 5517000 #> 4   [15,20) 2386000 2372000 4758000 #> 5   [20,25) 2076000 2073000 4149000 #> 6   [25,30) 1753000 1750000 3503000 #> 7   [30,35) 1453000 1432000 2885000 #> 8   [35,40) 1142000 1099000 2241000 #> 9   [40,45)  873000  846000 1719000 #> 10  [45,50)  673000  699000 1372000 #> 11  [50,55)  538000  601000 1139000 #> 12  [55,60)  433000  503000  936000 #> 13  [60,65)  357000  426000  783000 #> 14  [65,70)  266000  319000  585000 #> 15  [70,75)  182000  222000  404000 #> 16  [75,80)  108000  137000  245000 #> 17  [80,85)   51000   68000  119000 #> 18  [85,90)   17000   25000   42000 #> 19  [90,95)    3000    6000    9000 #> 20 [95,100)       0    1000    1000 range(svy$age) #> [1]  0 93 svy$ageGroup <-cut(svy$age,                     breaks = seq(from = 0, to = 95, by = 5),                    include.lowest = TRUE, right = FALSE) table(svy$ageGroup) #>  #>   [0,5)  [5,10) [10,15) [15,20) [20,25) [25,30) [30,35) [35,40) [40,45) [45,50)  #>    1598    1268    1072     808     870     575     580     385     424     258  #> [50,55) [55,60) [60,65) [65,70) [70,75) [75,80) [80,85) [85,90) [90,95]  #>     284     128     165      82      98      51      60      18      12 table(svy$ageGroup, svy$sex) #>           #>             1   2 #>   [0,5)   821 777 #>   [5,10)  637 631 #>   [10,15) 547 525 #>   [15,20) 389 419 #>   [20,25) 342 528 #>   [25,30) 343 232 #>   [30,35) 250 330 #>   [35,40) 177 208 #>   [40,45) 206 218 #>   [45,50) 125 133 #>   [50,55) 162 122 #>   [55,60)  70  58 #>   [60,65)  87  78 #>   [65,70)  33  49 #>   [70,75)  47  51 #>   [75,80)  22  29 #>   [80,85)  24  36 #>   [85,90)  10   8 #>   [90,95]   1  11 pyramid.plot(svy$ageGroup, svy$sex) pyramid.plot(svy$ageGroup,               svy$sex,               main = \"Age-group by sex\",              xlab = \"Number (Males | Females)\",               ylab = \"\",               las = 1,               cex.names = 0.9) pyramid.plot(svy$ageGroup,               svy$sex,               main = \"Age-group by sex\",              xlab = \"Number (Males | Females)\",               ylab = \"Age-group\",               las = 1,              cex.names = 0.9) par(mar = c(5, 5, 4, 2))  pyramid.plot(svy$ageGroup,               svy$sex,               main = \"Age-group by sex\",              xlab = \"Number (Males | Females)\",               ylab = \"\",               las = 1,               cex.names = 0.8)  title(ylab = \"Age-group\", line = 4) ah <- ageHeaping(svy$age, divisor = 10)  plot(ah, main = \"Remainder of age / 10\") ref <- ref[1:19, ]  expectedProportions <- ref$All / sum(ref$All) expectedNumbers <- expectedProportions * sum(table(svy$ageGroup))  mp <- barplot(table(svy$ageGroup),                main = \"Observed and expected numbers\",                ylim = c(0, max(expectedNumbers)),                las = 2)  lines(mp, expectedNumbers, lty = 2, lwd = 2) chisq.test(table(svy$ageGroup),             p = expectedProportions) #> Warning in chisq.test(table(svy$ageGroup), p = expectedProportions): Chi-squared #> approximation may be incorrect #>  #>  Chi-squared test for given probabilities #>  #> data:  table(svy$ageGroup) #> X-squared = 248.41, df = 18, p-value < 2.2e-16 chisq.test(table(svy$ageGroup),             p = expectedProportions,             simulate.p.value = TRUE) #>  #>  Chi-squared test for given probabilities with simulated p-value (based #>  on 2000 replicates) #>  #> data:  table(svy$ageGroup) #> X-squared = 248.41, df = NA, p-value = 0.0004998 censusM <- sum(ref$Males) censusF <- sum(ref$Females)  sexRatioTest(svy$sex,               codes = c(1, 2),               pop = c(censusM, censusF)) #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.4988 #> Observed proportion male = 0.4914 #> X-squared = 1.8770, p = 0.1707"},{"path":"https://nutriverse.io/nipnTK/articles/dp.html","id":"digit-preference-in-anthropometric-measurements","dir":"Articles","previous_headings":"","what":"Digit preference in anthropometric measurements","title":"Digit preference","text":"Measurements nutritional anthropometry surveys usually taken recorded one decimal place. Examples given table . Common measurements used anthropometric surveys Digit preference observation final number measurement occurs greater frequency expected chance. can occur rounding, practice increasing decreasing value measurement nearest whole half unit, data made . taking recording measurements field common field staff round first value decimal point zero five. Measurements whole numbers may also rounded nearest decade (e.g. 137 mm may rounded 140 mm) half-decade (e.g. 137 mm may rounded 135 mm). small number rounded measurements unlikely affect survey results. large number rounded measurements can affect survey results particularly measurements systematically rounded one direction. form bias. Fictitious data often shows digit preference (e.g.) ”2” “6” appearing final digits much frequently expected. happens , without using computer, large quantity random data much harder fake merely random-looking data. little digit preference anthropometric data expect final recorded digit measurement occur approximately equal frequency. can check digit preference absent data testing whether case. use R Language Data Analysis Graphics illustrate can done.","code":""},{"path":"https://nutriverse.io/nipnTK/articles/dp.html","id":"tabulation-and-visualisation","dir":"Articles","previous_headings":"","what":"Tabulation and visualisation","title":"Digit preference","text":"First work artificial data: use set.seed() resets pseudorandom number generator. ensures results shown get follow example analyses. always examine data performing formal tests. table can useful: returns: can look proportions instead counts: returns: prefer working percentages : returns: Examining data graphically useful: can add line showing expectation final digit occur 10% time: resulting plot shown .  tabular graphical analyses consistent little digit preference generated data. analyses agree expectation final digit occur 10% time. seeing random variation. can use formal test confirm : returns: example p-value 0.05 accept null hypothesis digit preference. important check digit zero nine represented tables plots. Missing digits can indicate strong digit preference. NiPN data quality toolkit provides fullTable() function. R language function produces table includes cells zero counts. example remove values final digit equal 6 generated data: see effect:  misleading analysis. easy miss final digits equal 6 data. plot misleading final digit 6 represented assumed ten rather nine final digits calculated expected frequencies. Chi-squared test correct account zero cases final digit equal 6. fullTable() function avoids issues:  Chi-squared test (incorrectly) calculated without zero cell: indicates problem data. chi-square test (correctly) calculated zero cell: indicates problem data. Note use sum(fullTable(finalDigits)) / 10 (.e. divide ten) know ten final digits (.e. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9). issue using hypothesis test chi-squared test. Test values strongly influenced sample size yielding false-negative results used small sample sizes false-positive results used large sample sizes. can illustrate generating new artificial data marked digit preference: creates table object containing counts imaginary final digits. Looking data:  marked digit preference zero five (see figure ). Chi-squared test: returns: example Chi-squared test failed detect marked digit preference. false negative test result. failure Chi-squared test example due small number observations (.e. n = 60) used analysis. tabular graphical analysis required identify digit preference problem example. usually working large sample sizes. can bring problem false positives. generate data: data approximate properties set true uniformly random numbers. digit preference might observe data due solely chance. generated data appear exhibit digit preference:  digit preference especially marked. Chi-squared test: yields: suggests significant digit preference. false positive result generated data constrained uniformly random digit preference observed due solely chance. failure Chi-squared test example due test mistaking random variation digit preference , part, due use large (.e. \\(n ~ = ~ 1000\\)) number observations. also important note test p < 0.05 significance threshold generate positive result 1 20 tests data exhibiting nothing random variation. tests p < 0.05 significance threshold 5% false positive rate.","code":"set.seed(0) finalDigits <- sample(x = 0:9, size = 1000, replace = TRUE) table(finalDigits) #> finalDigits #>   0   1   2   3   4   5   6   7   8   9  #>  95  80  96 102 106  98 109  95 109 110 prop.table(table(finalDigits)) #> finalDigits #>     0     1     2     3     4     5     6     7     8     9  #> 0.095 0.080 0.096 0.102 0.106 0.098 0.109 0.095 0.109 0.110 prop.table(table(finalDigits)) * 100 #> finalDigits #>    0    1    2    3    4    5    6    7    8    9  #>  9.5  8.0  9.6 10.2 10.6  9.8 10.9  9.5 10.9 11.0 barplot(table(finalDigits), xlab = \"Final digit\", ylab = \"Frequency\") abline(h = sum(table(finalDigits)) / 10, lty = 3) chisq.test(table(finalDigits)) #>  #>  Chi-squared test for given probabilities #>  #> data:  table(finalDigits) #> X-squared = 7.72, df = 9, p-value = 0.5626 finalDigits[finalDigits == 6] <- NA table(finalDigits) #> finalDigits #>   0   1   2   3   4   5   7   8   9  #>  95  80  96 102 106  98  95 109 110 prop.table(table(finalDigits)) * 100 #> finalDigits #>         0         1         2         3         4         5         7         8  #> 10.662177  8.978676 10.774411 11.447811 11.896745 10.998878 10.662177 12.233446  #>         9  #> 12.345679 barplot(table(finalDigits), xlab = \"Final digit\", ylab = \"Frequency\")  abline(h = sum(table(finalDigits)) / 10, lty = 3) chisq.test(table(finalDigits)) #>  #>  Chi-squared test for given probabilities #>  #> data:  table(finalDigits) #> X-squared = 6.8889, df = 8, p-value = 0.5487 fullTable(finalDigits) #>   0   1   2   3   4   5   6   7   8   9  #>  95  80  96 102 106  98   0  95 109 110 prop.table(fullTable(finalDigits)) * 100 #>         0         1         2         3         4         5         6         7  #> 10.662177  8.978676 10.774411 11.447811 11.896745 10.998878  0.000000 10.662177  #>         8         9  #> 12.233446 12.345679 barplot(fullTable(finalDigits), xlab = \"Final digit\", ylab = \"Frequency\")  abline(h = sum(fullTable(finalDigits)) / 10, lty = 3) chisq.test(fullTable(finalDigits)) #>  #>  Chi-squared test for given probabilities #>  #> data:  fullTable(finalDigits) #> X-squared = 106.65, df = 9, p-value < 2.2e-16 #>  #>  Chi-squared test for given probabilities #>  #> data:  table(finalDigits) #> X-squared = 6.8889, df = 8, p-value = 0.5487 #>  #>  Chi-squared test for given probabilities #>  #> data:  fullTable(finalDigits) #> X-squared = 106.65, df = 9, p-value < 2.2e-16 finalDigits <- as.table(x = c(11, 7, 5, 4, 7, 11, 5, 4, 4, 2))  names(finalDigits) <- 0:9 finalDigits #>  0  1  2  3  4  5  6  7  8  9  #> 11  7  5  4  7 11  5  4  4  2 prop.table(finalDigits) * 100 #>         0         1         2         3         4         5         6         7  #> 18.333333 11.666667  8.333333  6.666667 11.666667 18.333333  8.333333  6.666667  #>         8         9  #>  6.666667  3.333333 barplot(finalDigits, xlab = \"Final digit\", ylab = \"Frequency\")  abline(h = sum(finalDigits) / 10, lty = 3) chisq.test(finalDigits) #>  #>  Chi-squared test for given probabilities #>  #> data:  finalDigits #> X-squared = 13.667, df = 9, p-value = 0.1347 set.seed(3) finalDigits <- sample(x = 0:9, size = 1000, replace = TRUE) table(finalDigits) #> finalDigits #>   0   1   2   3   4   5   6   7   8   9  #> 102 104  96  88 103 115  91  86 105 110 prop.table(fullTable(finalDigits)) * 100 #>    0    1    2    3    4    5    6    7    8    9  #> 10.2 10.4  9.6  8.8 10.3 11.5  9.1  8.6 10.5 11.0 barplot(fullTable(finalDigits), xlab = \"Final digit\", ylab = \"Frequency\")  abline(h = sum(fullTable(finalDigits)) / 10, lty = 3) chisq.test(fullTable(finalDigits)) #>  #>  Chi-squared test for given probabilities #>  #> data:  fullTable(finalDigits) #> X-squared = 8.16, df = 9, p-value = 0.5181"},{"path":"https://nutriverse.io/nipnTK/articles/dp.html","id":"avoiding-false-positives-using-the-digit-preference-score","dir":"Articles","previous_headings":"","what":"Avoiding false positives using the digit preference score","title":"Digit preference","text":"problem false-positives can addressed using summary measure takes effect sample size account. widely used method digit preference score (DPS). DPS developed MONICA project: http://www.thl.fi/publications/monica/bp/bpqa.htm DPS corrects Chi-squared statistic (\\(\\chi ^ 2\\)) sample size (n) degrees freedom (df) test: \\[ DPS ~ = ~ 100 ~ \\times ~ \\sqrt{\\frac{\\chi ^ 2}{n ~ \\times ~ df}} \\] effect “desensitising” Chi-squared test. DPS can used anthropometric data types surveys may also applied clinical data. low DPS value indicates little digit preference. high DPS value indicates considerable digit preference. Guideline values DPS shown table . Guideline thresholds DPS NiPN data quality toolkit provides R language function digitPreference() calculating DPS. Applying function example data: yields: consistent little digit preference example data. output digitPreference() function can saved later use: saved output contains DPS value frequency tables final digits (counts percentages). can accessed using: saved results may also plotted: resulting plot shown .  now practice using digitPreference() function survey data. start retrieving survey data: file dp.ex01.csv comma-separated-value (CSV) file containing anthropometric data single state DHS survey West African country. first records dataset can seen using: returns: two variables interest wt (weight) ht (height). can examine digit preference variable weight (wt) using: returns: can plot digit preference using: resulting plot shown .  weight data shows digit preference classified “Good” using classifications shown table . can examine digit preference variable height (ht) using:  DPS value (22.77) DPS plot () show considerable digit preference height (ht) variable. classified “Problematic” using classifications shown table . Note specified digits = 1 used digitPreference() function weight height data example DHS data. variables measured recorded one decimal place. using digitPreference() function MUAC data measured recorded whole numbers (.e. decimal places) specify digits = 0. example: file dp.ex02.csv comma-separated-value (CSV) file containing anthropometric data SMART survey Kabul, Afghanistan. first records dataset can seen using: returns: variable interest muac (MUAC). variable measured recorded whole millimetres. can examine digit preference MUAC variable using:  DPS value (13.08) DPS plot () show considerable digit preference classified “Acceptable” using classifications shown table .","code":"digitPreference(finalDigits, digits = 0) #>  #>  Digit Preference Score #>  #> data:    finalDigits #> Digit Preference Score (DPS) = 3.01 (Excellent) dpsResults <- digitPreference(finalDigits, digits = 0) dpsResults$dps  #> [1] 3.01 dpsResults$tab  #> finalDigits #>   0   1   2   3   4   5   6   7   8   9  #> 102 104  96  88 103 115  91  86 105 110 dpsResults$pct  #> finalDigits #>    0    1    2    3    4    5    6    7    8    9  #> 10.2 10.4  9.6  8.8 10.3 11.5  9.1  8.6 10.5 11.0 dpsResults$dpsClass #> SMART DPS Class  #>     \"Excellent\" plot(dpsResults, main = \"finalDigit example data\") svy <- read.table(\"dp.ex01.csv\", header = TRUE, sep = \",\") head(svy) #>   psu age sex   wt   ht oedema #> 1 330  14   1  5.0 65.6      2 #> 2 330  54   2 12.1 99.0      2 #> 3 330  25   1  8.9 59.5      2 #> 4 330  52   1 14.6 98.0      2 #> 5 330  43   1 10.1 99.1      2 #> 6 330   7   1  4.0 58.1      2 digitPreference(svy$wt, digits = 1) #>  #>  Digit Preference Score #>  #> data:    svy$wt #> Digit Preference Score (DPS) = 11.86 (Good) plot(digitPreference(svy$wt, digits = 1), main = \"Weight\") digitPreference(svy$ht, digits = 1)  #>  #>  Digit Preference Score #>  #> data:    svy$ht #> Digit Preference Score (DPS) = 22.77 (Problematic) plot(digitPreference(svy$ht, digits = 1), main = \"Height\") svy <- read.table(\"dp.ex02.csv\", header = TRUE, sep = \",\") head(svy) #>   psu age sex weight height muac oedema #> 1   1   6   1    7.3   65.0  146      2 #> 2   1  42   2   12.5   89.5  156      2 #> 3   1  23   1   10.6   78.1  149      2 #> 4   1  18   1   12.8   81.5  160      2 #> 5   1  52   1   12.1   87.3  152      2 #> 6   1  36   2   16.9   93.0  190      2 digitPreference(svy$muac, digits = 0)  #>  #>  Digit Preference Score #>  #> data:    svy$muac #> Digit Preference Score (DPS) = 13.08 (Acceptable) plot(digitPreference(svy$muac, digits = 0), main = \"MUAC\")"},{"path":"https://nutriverse.io/nipnTK/articles/dp.html","id":"some-warnings","dir":"Articles","previous_headings":"","what":"Some warnings","title":"Digit preference","text":"material presented assumed data recorded fixed precision (e.g. one decimal place weight height, decimal places MUAC). may case data recorded mixed precision. example, weights younger children may measured using “baby scales” recorded nearest 10 g (.e. two decimal places) weights older children measured using “hanging scales” recorded nearest 100 g (.e. one decimal place). sorts situations can difficult handle automatically since (e.g.) 3.1 3.10 number stored way. easiest approach treat data two separate datasets examining digit preference. Care taken ensure mistake limitations measuring instrument digit preference. example, designs MUAC tape can return measurements even number final digit. case never see MUAC measurements 1, 3, 5, 7, 9 final digit. limitation instrument look like digit preference. digitPreference() function can handle situation. retrieve dataset: file dp.ex03.csv comma-separated-value (CSV) file containing anthropometric data sample children living refugee camp West African country. MUAC measured using “numbers boxes” design MUAC tape:  can even numbers final digit type MUAC tape used. check : returns: even numbers. odd number recording error data-entry error. can examine digit preference data using digitPreference() function: returns: misleading digitPreference() function assumes possible final digits (.e. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9) present. case example data. can examine using: returns: can use values parameter digitPreference() specify values allowed final digit: returns: DPS moved 33.34 (“Problematic”) 0.78 (“Excellent”). can tabulate plot frequency final digits muac variable:","code":"svy <- read.table(\"dp.ex03.csv\", header = TRUE, sep = \",\")  head(svy) #>   age sex weight height muac oedema #> 1  36   2   12.4   86.9  150      2 #> 2  39   2   10.9   83.5  146      2 #> 3  29   1   11.6   90.6  138      2 #> 4  47   2   14.6   95.5  170      2 #> 5  16   1   10.4   78.9  154      2 #> 6  23   1    8.9   80.8  146      2 table(svy$muac) #>  #> 108 114 118 120 122 124 126 128 130 132 134 136 138 140 142 144 146 148 150 152  #>   1   1   3   3   2   6   5   5  21   8  16  23  20  16  32  26  24  22  16  25  #> 154 156 158 160 162 164 166 168 170 174 176 178  #>  16  14  19   8   7   7   9   3  11   2   2   1 digitPreference(svy$muac, digits = 0) #>  #>  Digit Preference Score #>  #> data:    svy$muac #> Digit Preference Score (DPS) = 33.34 (Problematic) digitPreference(svy$muac, digits = 0)$tab #> svy$muac #>  0  1  2  3  4  5  6  7  8  9  #> 75  0 74  0 74  0 77  0 74  0 digitPreference(svy$muac, digits = 0, values = c(0, 2, 4, 6, 8)) #>  #>  Digit Preference Score #>  #> data:    svy$muac #> Digit Preference Score (DPS) = 0.78 (Excellent) dpsResults <- digitPreference (svy$muac, digits = 0, values = c(0, 2, 4, 6, 8))  dpsResults$tab #> svy$muac #>  0  2  4  6  8  #> 75 74 74 77 74 dpsResults$pct #> svy$muac #>    0    2    4    6    8  #> 20.1 19.8 19.8 20.6 19.8 plot(dpsResults)"},{"path":"https://nutriverse.io/nipnTK/articles/flagging.html","id":"applying-who-flagging-criteria-to-survey-data","dir":"Articles","previous_headings":"","what":"Applying WHO flagging criteria to survey data","title":"Identifying outliers using flags","text":"first exercise, apply flagging criteria survey data. retrieve survey dataset: file flag.ex01.csv comma-separated-value (CSV) file containing anthropometric data recent SMART survey Sudan. Applying flagging criteria straightforward. first create column contain flag code set zero (.e. flags) records: apply flagging criteria index. apply flagging criteria HAZ index: can translated “HAZ missing HAZ -6 HAZ +6 add 1 flag variable else leave flag variable unchanged”. careful using \\(<\\) comparison operator negative numbers. Always insert space \\(<\\) \\(–\\) characters. R interprets \\(<-\\) assignment operator may produce unexpected unwanted results without issuing warning error message. apply flagging criteria WHZ index: apply flagging criteria WAZ index: Note time apply flagging criteria increase value flagging variable next power two problem detected: another index use \\(2 ^ 3\\) (.e. 8) flag problem index. advantage using coding scheme compactly codes possible combinations problems single variable (see table ). number flagged records example dataset. : returns: table shows relative frequency detected problems. See table find meaning codes.   Flagging codes based powers two meanings   number flagged records can found using: returns: proportion records flagged can found using: returns: 4.45% records flagged. Note missing values flagged. can useful check missing values see missing component measurements component measurement range calculation index values (e.g. WAZ calculated children aged ten years younger). issue can explored selection listing. example: returns: one missing value whz record 8.due missing value height (shown NA). haz also missing. may possible fix issue missing data available paper forms. Flagging dual role: data-checking tool. access data collection forms often able check records fix data-entry errors data. measure data-quality. Flagged records can indicate problems measurement, recording, data-entry, data-checking. proportion flagged records dataset , ideally, 2.5%. SMART guidelines consider proportions 7.5% problematic. found 4.45% records example dataset flagged. data acceptable quality. can use: display flagged records. : produces compact list. example dataset records identified using combination psu child variables. listed records can checked edited (see previous table). Anthropometric indices can recalculated flagging process repeated records can fixed fixed. Records fixed can censored analysis. Records usually censored index--index basis. example, analysis based WHZ censor records flag variable 2, 3, 6, 7. Table shows censoring rules index:   Censoring rules index   careful applying censoring rules. analysis prevalence using WHZ, example, usually include children oedema commonly used case-definition acute malnutrition : \\[ \\text{WHZ} < -2 ~ \\text{bilateral pitting oedema} \\] want use case-definitions include oedema careful exclude children oedema censoring flagged records. analysis using WAZ might want exclude oedema cases.","code":"svy <- read.table(\"flag.ex01.csv\", header = TRUE, sep = \",\") #>   psu child age sex weight height muac oedema   haz   waz   whz #> 1   1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03 #> 2   1     2  13   2    6.4   70.4  116      2 -1.83 -3.04 -2.93 #> 3   1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25 #> 4   1     4  15   1    7.2   75.4  130      2 -1.48 -3.22 -3.57 #> 5   1     5  15   1    7.4   70.0  124      2 -3.61 -2.99 -1.61 #> 6   1     6  18   2    7.7   70.6  130      2 -3.48 -2.40 -0.82 svy$flag <- 0 #>   psu child age sex weight height muac oedema   haz   waz   whz flag #> 1   1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03    0 #> 2   1     2  13   2    6.4   70.4  116      2 -1.83 -3.04 -2.93    0 #> 3   1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25    0 #> 4   1     4  15   1    7.2   75.4  130      2 -1.48 -3.22 -3.57    0 #> 5   1     5  15   1    7.4   70.0  124      2 -3.61 -2.99 -1.61    0 #> 6   1     6  18   2    7.7   70.6  130      2 -3.48 -2.40 -0.82    0 svy$flag <- ifelse(!is.na(svy$haz) & (svy$haz < -6 | svy$haz > 6), svy$flag + 1, svy$flag) #>   psu child age sex weight height muac oedema   haz   waz   whz flag #> 1   1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03    0 #> 2   1     2  13   2    6.4   70.4  116      2 -1.83 -3.04 -2.93    0 #> 3   1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25    0 #> 4   1     4  15   1    7.2   75.4  130      2 -1.48 -3.22 -3.57    0 #> 5   1     5  15   1    7.4   70.0  124      2 -3.61 -2.99 -1.61    0 #> 6   1     6  18   2    7.7   70.6  130      2 -3.48 -2.40 -0.82    0 svy$flag <- ifelse(!is.na(svy$whz) & (svy$whz < - 5 | svy$whz > 5), svy$flag + 2, svy$flag) #>   psu child age sex weight height muac oedema   haz   waz   whz flag #> 1   1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03    2 #> 2   1     2  13   2    6.4   70.4  116      2 -1.83 -3.04 -2.93    0 #> 3   1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25    0 #> 4   1     4  15   1    7.2   75.4  130      2 -1.48 -3.22 -3.57    0 #> 5   1     5  15   1    7.4   70.0  124      2 -3.61 -2.99 -1.61    0 #> 6   1     6  18   2    7.7   70.6  130      2 -3.48 -2.40 -0.82    0 svy$flag <- ifelse(!is.na(svy$waz) & (svy$waz < - 6 | svy$waz > 5), svy$flag + 4, svy$flag) #>   psu child age sex weight height muac oedema   haz   waz   whz flag #> 1   1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03    2 #> 2   1     2  13   2    6.4   70.4  116      2 -1.83 -3.04 -2.93    0 #> 3   1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25    0 #> 4   1     4  15   1    7.2   75.4  130      2 -1.48 -3.22 -3.57    0 #> 5   1     5  15   1    7.4   70.0  124      2 -3.61 -2.99 -1.61    0 #> 6   1     6  18   2    7.7   70.6  130      2 -3.48 -2.40 -0.82    0 We started with zero  Then we added $2 ^ 0$ (i.e. 1) if HAZ was out of range.   Then we added $2 ^ 1$ (i.e. 2) if WHZ was out of range.   Then we added $2 ^ 2$ (i.e. 4) if WAZ was out of range. table(svy$flag) #>  #>   0   1   2   3   5   6  #> 751   9  12   9   2   3 table(svy$flag != 0)[\"TRUE\"] #> TRUE  #>   35 prop.table(table(svy$flag != 0))[\"TRUE\"] #>       TRUE  #> 0.04452926 svy[is.na(svy$whz), c(\"weight\", \"height\", \"whz\")] #>   weight height whz #> 8    8.1     NA  NA svy[svy$flag != 0, ] #>     psu child age sex weight height muac oedema   haz   waz   whz flag #> 1     1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03    2 #> 29    1    29  24   2   16.3  107.3  155      2  6.69  2.69 -0.82    1 #> 32    2     1  12   1    6.1   99.4  112      2  9.95 -4.02 -9.18    3 #> 35    2     4  24   2    6.8   65.5  128      2 -6.27 -4.30 -0.63    1 #> 88    3    30  24   2   16.9  107.5  158      2  6.75  2.95 -0.47    1 #> 106   4    18  36   1   13.4   65.7  152      2 -8.20 -0.56  7.64    3 #> 174   7     3  36   2    6.8   66.6  134      2 -7.47 -5.35 -1.01    1 #> 198   8     1  27   2    5.5   66.0  112      2 -6.59 -5.92 -3.27    1 #> 280  11     7  24   1    6.7   81.7  140      2 -1.77 -4.86 -5.63    2 #> 286  11    13  48   1    9.4   77.3  146      2 -6.21 -4.25 -0.69    1 #> 292  11    19  12   1   12.9   92.3  152      2  6.97  2.68 -0.50    1 #> 307  12     3  36   1    7.5   90.0  130      2 -1.64 -4.99 -6.42    2 #> 350  14     1  20   1    5.7   77.8  142      2 -2.27 -5.49 -6.47    2 #> 352  14     3  48   1    6.5   80.7  140      2 -5.40 -6.22 -5.74    6 #> 368  14    19  48   1   13.4   66.3  144      2 -8.83 -1.58  7.33    3 #> 399  15    21  36   1   14.3   66.0  154      2 -8.12 -0.02  8.58    3 #> 400  15    22  48   1   14.5   68.0  152      2 -8.42 -0.95  7.80    3 #> 405  16     4  24   2    7.8   65.0  145      2 -6.42 -3.27  1.04    1 #> 406  16     5  12   1    7.8   98.0  138      2  9.36 -1.93 -7.23    3 #> 408  16     7  48   1    8.0   77.0  128      2 -6.28 -5.20 -2.66    1 #> 432  17     3   6   1    7.9   98.4  138      2 14.38 -0.04 -7.18    3 #> 433  17     4  48   2    8.3   94.9  136      2 -1.82 -4.79 -5.63    2 #> 490  19     1  12   2    5.3   72.0  152      2 -0.78 -4.27 -5.30    2 #> 591  22    24  36   1   14.0   69.0  152      2 -7.31 -0.20  6.77    3 #> 594  23     1  36   1    5.4   80.0  140      2 -4.34 -6.66 -7.27    6 #> 595  23     2  36   1    5.9   72.0  114      2 -6.50 -6.26 -4.96    5 #> 596  23     3  24   1    6.3   77.0  130      2 -3.31 -5.24 -5.38    2 #> 599  23     6  36   1    6.5   80.0  130      2 -4.34 -5.79 -5.61    2 #> 616  23    23  36   1   16.0   74.0  144      2 -5.96  0.90  6.82    2 #> 640  25     1  12   2    6.3   99.3  110      2  9.82 -2.96 -8.25    3 #> 641  25     2  48   2    6.7   85.0  140      2 -4.12 -5.90 -5.83    2 #> 671  26     1  48   1    5.3   95.0  135      2 -1.99 -7.03 -9.71    6 #> 690  26    20  36   1   16.0   79.0  162      2 -4.61  0.90  5.34    2 #> 715  28     4  36   2    7.7  103.0  114      2  2.09 -4.60 -7.31    2 #> 757  30     1  24   1    5.5   68.6  106      2 -6.06 -6.01 -4.76    5 svy[svy$flag != 0, c(\"psu\", \"child\", \"flag\")] #>     psu child flag #> 1     1     1    2 #> 29    1    29    1 #> 32    2     1    3 #> 35    2     4    1 #> 88    3    30    1 #> 106   4    18    3 #> 174   7     3    1 #> 198   8     1    1 #> 280  11     7    2 #> 286  11    13    1 #> 292  11    19    1 #> 307  12     3    2 #> 350  14     1    2 #> 352  14     3    6 #> 368  14    19    3 #> 399  15    21    3 #> 400  15    22    3 #> 405  16     4    1 #> 406  16     5    3 #> 408  16     7    1 #> 432  17     3    3 #> 433  17     4    2 #> 490  19     1    2 #> 591  22    24    3 #> 594  23     1    6 #> 595  23     2    5 #> 596  23     3    2 #> 599  23     6    2 #> 616  23    23    2 #> 640  25     1    3 #> 641  25     2    2 #> 671  26     1    6 #> 690  26    20    2 #> 715  28     4    2 #> 757  30     1    5"},{"path":"https://nutriverse.io/nipnTK/articles/flagging.html","id":"applying-smart-flagging-criteria-to-survey-data","dir":"Articles","previous_headings":"","what":"Applying SMART flagging criteria to survey data","title":"Identifying outliers using flags","text":"next exercise apply SMART flagging criteria survey dataset. retrieve survey dataset: create column contain flag code set zero (.e. flags) records: Applying SMART flagging criteria requires us first calculate mean index value: use mean value define flagging ranges: index: number flagged records example dataset. : returns: table shows relative frequency detected problems. See previous table find meaning codes. number flagged records can found using: returns: proportion records flagged can found using: returns: 16% records flagged. high proportion records flagged. Note SMART flagging criteria identify considerably records (126 records flagged) flagging criteria (35 records flagged). example SMART flagging criteria flagged 91 biologically plausible records. can list flagged records using: listed records can checked edited (see previous table). Anthropometric indices can recalculated flagging process repeated records can fixed fixed. listing records displaying large tables may see message like : max.print option sets limit length information can displayed single command. can alter behaviour using:","code":"svy <- read.table(\"flag.ex01.csv\", header = TRUE, sep = \",\") #>   psu child age sex weight height muac oedema   haz   waz   whz #> 1   1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03 #> 2   1     2  13   2    6.4   70.4  116      2 -1.83 -3.04 -2.93 #> 3   1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25 #> 4   1     4  15   1    7.2   75.4  130      2 -1.48 -3.22 -3.57 #> 5   1     5  15   1    7.4   70.0  124      2 -3.61 -2.99 -1.61 #> 6   1     6  18   2    7.7   70.6  130      2 -3.48 -2.40 -0.82 svy$flag <- 0 meanHAZ <- mean(svy$haz, na.rm = TRUE) svy$flag <- ifelse(!is.na(svy$haz) &                (svy$haz < (meanHAZ - 3) | svy$haz > (meanHAZ + 3)),                svy$flag + 1, svy$flag) meanWHZ <- mean(svy$whz, na.rm = TRUE)  svy$flag <- ifelse(!is.na(svy$whz) &               (svy$whz < (meanWHZ - 3) | svy$whz > (meanWHZ + 3)),               svy$flag + 2, svy$flag)   meanWAZ <- mean(svy$waz, na.rm = TRUE)  svy$flag <- ifelse(!is.na(svy$waz) &               (svy$waz < (meanWAZ - 3) | svy$waz > (meanWAZ + 3)),               svy$flag + 4, svy$flag) table(svy$flag) #>  #>   0   1   2   3   4   5   6   7  #> 660  59  11  16   1  19  16   4 table(svy$flag != 0)[\"TRUE\"] #> TRUE  #>  126 prop.table(table(svy$flag != 0))[\"TRUE\"] #>      TRUE  #> 0.1603053 svy[svy$flag != 0, ] #>     psu child age sex weight height muac oedema   haz   waz   whz flag #> 1     1     1  20   2    6.1   82.5  127      2 -0.07 -4.54 -6.03    2 #> 3     1     3  15   1    7.1   67.5  124      2 -4.60 -3.34 -1.25    1 #> 15    1    15  36   1   12.3   79.7  144      2 -4.42 -1.27  1.97    3 #> 28    1    28  48   2   15.8  109.7  146      2  1.62 -0.12 -1.72    1 #> 29    1    29  24   2   16.3  107.3  155      2  6.69  2.69 -0.82    5 #> 31    1    31  48   2   18.8  109.9  166      2  1.66  1.10  0.13    1 #> 32    2     1  12   1    6.1   99.4  112      2  9.95 -4.02 -9.18    3 #> 34    2     3  24   2    6.5   76.0  108      2 -3.01 -4.61 -4.16    6 #> 35    2     4  24   2    6.8   65.5  128      2 -6.27 -4.30 -0.63    1 #> 36    2     5  36   1    7.3   76.0  110      2 -5.42 -5.15 -3.56    5 #> 42    2    11  12   2    9.9   80.0  150      2  2.32  0.82 -0.21    1 #> 44    2    13  36   2   10.5   78.0  142      2 -4.48 -2.24  0.87    1 #> 52    2    21  36   1   12.7   77.5  144      2 -5.01 -1.01  2.77    3 #> 57    2    26  24   1   15.5   93.7  166      2  2.16  2.13  1.46    5 #> 59    3     1  18   2    5.7   67.0  110      2 -4.72 -4.72 -3.21    5 #> 66    3     8  48   2    9.4   79.0  144      2 -5.51 -4.03 -0.57    1 #> 76    3    18  24   2   12.1   96.0  138      2  3.19  0.42 -1.79    1 #> 88    3    30  24   2   16.9  107.5  158      2  6.75  2.95 -0.47    5 #> 89    4     1  26   2    6.6   71.7  114      2 -4.73 -4.74 -2.95    5 #> 106   4    18  36   1   13.4   65.7  152      2 -8.20 -0.56  7.64    3 #> 107   4    19  24   1   13.7   97.6  150      2  3.43  1.05 -0.89    1 #> 122   5     4  24   1    8.0   73.3  130      2 -4.52 -3.61 -1.66    1 #> 125   5     7  36   2   11.3  106.2  150      2  2.93 -1.63 -4.61    3 #> 139   5    21  24   2   15.2   82.0  138      2 -1.15  2.18  3.97    6 #> 154   6    14  24   2   11.9   91.0  148      2  1.64  0.29 -0.91    1 #> 165   6    25  36   1   14.9  108.0  144      2  3.21  0.31 -2.13    1 #> 173   7     2  10   2    6.5   76.2  122      2  1.91 -2.23 -4.20    3 #> 174   7     3  36   2    6.8   66.6  134      2 -7.47 -5.35 -1.01    5 #> 187   7    16  10   2   11.6   84.3  152      2  5.19  2.50  0.54    5 #> 198   8     1  27   2    5.5   66.0  112      2 -6.59 -5.92 -3.27    5 #> 199   8     2  24   2    6.4   75.0  138      2 -3.32 -4.72 -4.10    6 #> 201   8     4  24   1    7.1   70.5  122      2 -5.44 -4.47 -2.31    1 #> 203   8     6  31   1    8.5   72.9  134      2 -5.71 -3.83 -0.79    1 #> 205   8     8  36   2    9.4   78.0  146      2 -4.48 -3.18 -0.35    1 #> 212   8    15  48   1   11.4  102.5  126      2 -0.20 -2.88 -4.22    2 #> 254   9    30  42   1   17.9  109.4  164      2  2.41  1.23 -0.26    1 #> 255  10     1  23   1    6.7   71.0  118      2 -5.32 -4.76 -3.23    5 #> 274  11     1  24   2    5.8   71.9  108      2 -4.28 -5.34 -4.40    6 #> 280  11     7  24   1    6.7   81.7  140      2 -1.77 -4.86 -5.63    6 #> 283  11    10  36   1    8.5   78.3  126      2 -4.80 -4.20 -2.19    1 #> 286  11    13  48   1    9.4   77.3  146      2 -6.21 -4.25 -0.69    1 #> 290  11    17  24   2   12.4   99.9  136      2  4.40  0.62 -2.33    1 #> 292  11    19  12   1   12.9   92.3  152      2  6.97  2.68 -0.50    5 #> 301  11    28  24   2   15.1   85.3  140      2 -0.13  2.13  2.94    6 #> 302  11    29  30   1   15.2   82.9  154      2 -2.65  1.13  3.76    2 #> 303  11    30  48   2   15.8   90.5  132      2 -2.84 -0.12  2.29    2 #> 307  12     3  36   1    7.5   90.0  130      2 -1.64 -4.99 -6.42    6 #> 313  12     9  12   1   10.0   81.0  150      2  2.21  0.33 -0.75    1 #> 315  12    11  48   1   10.6   84.0  142      2 -4.61 -3.43 -0.75    1 #> 330  13     3  24   1    7.7   73.0  114      2 -4.62 -3.90 -2.06    1 #> 340  13    13  12   2   11.1   79.0  152      2  1.94  1.72  1.26    5 #> 345  13    18  24   1   13.3   96.1  142      2  2.94  0.79 -0.93    1 #> 350  14     1  20   1    5.7   77.8  142      2 -2.27 -5.49 -6.47    6 #> 352  14     3  48   1    6.5   80.7  140      2 -5.40 -6.22 -5.74    7 #> 366  14    17  24   1   12.7   92.3  185      2  1.70  0.39 -0.70    1 #> 368  14    19  48   1   13.4   66.3  144      2 -8.83 -1.58  7.33    3 #> 379  15     1  12   1    5.1   66.0  106      2 -4.10 -5.24 -4.75    6 #> 395  15    17  24   1   13.1   80.0  144      2 -2.33  0.66  2.62    2 #> 399  15    21  36   1   14.3   66.0  154      2 -8.12 -0.02  8.58    3 #> 400  15    22  48   1   14.5   68.0  152      2 -8.42 -0.95  7.80    3 #> 403  16     2  24   1    7.0   74.0  130      2 -4.29 -4.57 -3.56    4 #> 405  16     4  24   2    7.8   65.0  145      2 -6.42 -3.27  1.04    1 #> 406  16     5  12   1    7.8   98.0  138      2  9.36 -1.93 -7.23    3 #> 408  16     7  48   1    8.0   77.0  128      2 -6.28 -5.20 -2.66    5 #> 432  17     3   6   1    7.9   98.4  138      2 14.38 -0.04 -7.18    3 #> 433  17     4  48   2    8.3   94.9  136      2 -1.82 -4.79 -5.63    6 #> 435  17     6   9   1    8.8   77.7  136      2  2.55 -0.11 -1.61    1 #> 448  17    19  36   1   13.9  105.0  138      2  2.41 -0.26 -2.34    1 #> 449  17    20  36   2   14.4  107.5  162      2  3.27  0.30 -2.27    1 #> 460  17    31  48   1   18.5   96.2  170      2 -1.70  0.96  3.00    2 #> 462  18     2   7   1    7.6   76.5  146      2  3.38 -0.80 -3.19    1 #> 464  18     4  23   1    8.0   73.4  134      2 -4.52 -3.49 -1.69    1 #> 468  18     8  36   1    9.3   77.6  140      2 -4.99 -3.57 -0.89    1 #> 483  18    23  24   1   15.8  102.5  146      2  5.04  2.29 -0.21    5 #> 489  18    29  48   2   19.2  109.9  164      2  1.66  1.24  0.35    1 #> 490  19     1  12   2    5.3   72.0  152      2 -0.78 -4.27 -5.30    2 #> 499  19    10  48   1   10.0   84.2  140      2 -4.56 -3.84 -1.53    1 #> 508  19    19  24   1   13.7   98.0  180      2  3.56  1.05 -0.97    1 #> 510  19    21  24   1   13.9   92.7  152      2  1.83  1.18  0.35    1 #> 512  19    23  36   2   15.8  101.5  174      2  1.69  1.00  0.09    1 #> 519  20     7  18   1    9.4   69.5  140      2 -4.73 -1.36  1.47    1 #> 528  20    16  24   2   12.5   91.5  146      2  1.79  0.68 -0.46    1 #> 530  20    18  24   2   13.2   91.2  160      2  1.70  1.11  0.22    1 #> 536  20    24  48   2   17.5  109.9  154      2  1.66  0.61 -0.63    1 #> 537  20    25  36   1   18.1  109.3  162      2  3.57  1.90 -0.11    5 #> 557  21    19  24   2   11.4   92.0  138      2  1.95 -0.05 -1.64    1 #> 587  22    20  36   2   12.7   80.4  154      2 -3.85 -0.68  2.38    2 #> 591  22    24  36   1   14.0   69.0  152      2 -7.31 -0.20  6.77    3 #> 594  23     1  36   1    5.4   80.0  140      2 -4.34 -6.66 -7.27    6 #> 595  23     2  36   1    5.9   72.0  114      2 -6.50 -6.26 -4.96    7 #> 596  23     3  24   1    6.3   77.0  130      2 -3.31 -5.24 -5.38    6 #> 598  23     5  24   2    6.5   71.0  124      2 -4.56 -4.61 -2.93    5 #> 599  23     6  36   1    6.5   80.0  130      2 -4.34 -5.79 -5.61    6 #> 600  23     7  24   2    7.0   70.0  112      2 -4.87 -4.10 -1.75    1 #> 604  23    11  14   1    8.0   66.0  136      2 -4.86 -2.11  0.77    1 #> 607  23    14  36   1    8.3   74.0  138      2 -5.96 -4.36 -1.40    1 #> 612  23    19  48   1   11.5   80.0  144      2 -5.56 -2.81  1.14    1 #> 616  23    23  36   1   16.0   74.0  144      2 -5.96  0.90  6.82    3 #> 621  24     5  24   1    8.4   72.2  140      2 -4.88 -3.22 -0.73    1 #> 633  24    17  24   2   12.9   93.2  152      2  2.32  0.93 -0.46    1 #> 640  25     1  12   2    6.3   99.3  110      2  9.82 -2.96 -8.25    3 #> 641  25     2  48   2    6.7   85.0  140      2 -4.12 -5.90 -5.83    6 #> 649  25    10  36   2    8.6   78.0  134      2 -4.48 -3.85 -1.38    1 #> 661  25    22  24   2   12.4   91.0  140      2  1.64  0.62 -0.44    1 #> 671  26     1  48   1    5.3   95.0  135      2 -1.99 -7.03 -9.71    6 #> 672  26     2  18   2    5.6   67.0  108      2 -4.72 -4.84 -3.41    5 #> 674  26     4  36   1    8.0   76.0  134      2 -5.42 -4.60 -2.40    5 #> 679  26     9  48   1   10.5   82.0  142      2 -5.09 -3.50 -0.38    1 #> 683  26    13  24   1   13.8   75.0  156      2 -3.97  1.11  4.39    2 #> 685  26    15  24   1   14.3   85.0  168      2 -0.69  1.42  2.40    2 #> 689  26    19  36   1   15.8  104.0  148      2  2.14  0.80 -0.54    1 #> 690  26    20  36   1   16.0   79.0  162      2 -4.61  0.90  5.34    3 #> 692  27     2  24   2    7.1   68.2  124      2 -5.43 -4.00 -1.04    1 #> 698  27     8  36   2    8.4   75.4  124      2 -5.16 -4.01 -1.06    1 #> 715  28     4  36   2    7.7  103.0  114      2  2.09 -4.60 -7.31    7 #> 721  28    10  48   1   10.3   82.0  148      2 -5.09 -3.64 -0.61    1 #> 723  28    12  15   1   11.0   73.0  162      2 -2.43  0.59  2.24    2 #> 733  29     1  16   1    5.9   69.2  112      2 -4.26 -4.85 -4.17    6 #> 734  29     2  17   1    6.1   69.3  114      2 -4.53 -4.75 -3.81    5 #> 745  29    13  24   1   11.0   70.3  114      2 -5.50 -0.87  3.01    3 #> 757  30     1  24   1    5.5   68.6  106      2 -6.06 -6.01 -4.76    7 #> 767  30    11  36   2   10.2   77.5  142      2 -4.61 -2.49  0.66    1 #> 781  30    25  24   2   13.3   91.5  152      2  1.79  1.16  0.24    1 #> 783  30    27  36   1   14.2  102.3  138      2  1.68 -0.08 -1.48    1 #> 784  30    28  36   1   14.6  106.1  154      2  2.70  0.15 -1.97    1 #> 786  30    30  36   2   15.5  101.2  154      2  1.61  0.86 -0.05    1 #> [1] \"[ reached getOption(\\\"max.print\\\") -- omitted 43 rows ]\" options(max.print = 99999)"},{"path":"https://nutriverse.io/nipnTK/articles/flagging.html","id":"flagging-data-from-older-children","dir":"Articles","previous_headings":"","what":"Flagging data from older children","title":"Identifying outliers using flags","text":"process flagging anthropometric indices older children similar used younger children. retrieve survey dataset: file flag.ex02.csv comma-separated-value (CSV) file containing anthropometric data survey children aged 11 year older attending school Ethiopia. variables interest height--age z-score (haz) BMI--age z-score (baz). apply flagging criteria (see previous table) variables: Note usually apply SMART flagging criteria older (.e. > 59 months) children. coding flag variable shown previous table. Flagging codes based powers two meanings : returns: table shows relative frequency detected problems. See previous table find meaning codes. number flagged records can found using: returns: proportion records flagged can found using: returns: 1.3% records flagged. acceptably low proportion records flagged. can list flagged records using: listed records can checked edited (see previous table). Anthropometric indices can recalculated flagging process repeated records can fixed fixed.","code":"svy <- read.table(\"flag.ex02.csv\", header = TRUE, sep = \",\") #>   school sex ageMonths weight height   haz   baz #> 1   1112   1       173   25.5  179.0  1.70 -8.19 #> 2   1113   2       145   22.7  164.0  1.79 -6.81 #> 3   1116   1       150   13.5  135.0 -2.40 -8.64 #> 4   1123   1       150   25.3  165.0  1.73 -6.92 #> 5   1404   2       163   19.0  116.5 -6.05 -2.89 #> 6   1501   2       185   27.4  136.6 -3.73 -2.85 svy$flag <- 0  svy$flag <- ifelse(!is.na(svy$haz) & (svy$haz < -6 | svy$haz > 6),                svy$flag + 1, svy$flag)  svy$flag <- ifelse(!is.na(svy$baz) & (svy$baz < -5 | svy$baz > 5),                svy$flag + 2, svy$flag) table(svy$flag) #>  #>   0   1   2  #> 960   2  11 table(svy$flag != 0)[\"TRUE\"] #> TRUE  #>   13 prop.table(table(svy$flag != 0))[\"TRUE\"] #>       TRUE  #> 0.01336074 svy[svy$flag != 0, ] #>     school sex ageMonths weight height   haz   baz flag #> 1     1112   1       173   25.5  179.0  1.70 -8.19    2 #> 2     1113   2       145   22.7  164.0  1.79 -6.81    2 #> 3     1116   1       150   13.5  135.0 -2.40 -8.64    2 #> 4     1123   1       150   25.3  165.0  1.73 -6.92    2 #> 5     1404   2       163   19.0  116.5 -6.05 -2.89    1 #> 23    1501   2       137   24.7  155.0  1.09 -5.20    2 #> 190   1507   1       173   24.0  154.0 -1.52 -6.46    2 #> 328   1511   1       138   26.9  165.5  2.82 -6.29    2 #> 969   1705   1       185   27.4  150.4 -2.62 -5.06    2 #> 970   1708   1       197   23.9  126.2 -6.19 -3.17    1 #> 971   1708   1       185   23.6  140.7 -3.86 -5.21    2 #> 972   1909   2       174   26.5  153.7 -1.04 -5.04    2 #> 973   2001   1       139   20.7  143.1 -0.49 -6.02    2"},{"path":"https://nutriverse.io/nipnTK/articles/nipnTK.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"The NiPN data quality toolkit","text":"document presents set practical analytical methods can applied variables datasets assess quality. index data quality describes scores quality data also presented. focus toolkit data required assess anthropometric status measurements weight, height length, MUAC, sex age. focus anthropometric status many presented methods applied variables. NiPN may commission additional toolkits examine variables types variables. Data quality assessed : Range checks value checks identify univariate outliers. Scatterplots statistical methods identify bivariate outliers. Use flags identify outliers anthropometric indices. Examining distribution statistics distribution measurements anthropometric indices. Assessing extent digit preference recorded measurements. Assessing extent age heaping recorded ages. Examining sex ratio. Examining age distributions age sex distributions. activities proposed order performed shown figure . NiPN data quality workflow material intended provide practical “hands ” introduction assessing data quality presented series computer-based exercises. Example datasets provided. Extensive use made R language environment statistical computing. free powerful data analysis system. Methods described sufficient detail allow activities performed using data analysis systems. R provides extensive language working data. material presented written using small subset R language. Many data quality activities supported R functions written specifically purpose. simplify assessment quality data related anthropometry anthropometric indices. basic R functions, purpose written functions, filenames example datasets also shown figure . purpose written functions described detail .","code":""},{"path":"https://nutriverse.io/nipnTK/articles/rl.html","id":"checking-quantitative-data","dir":"Articles","previous_headings":"","what":"Checking quantitative data","title":"Checking ranges and legal values","text":"use dataset rl.ex01 included nipnTK package. rl.ex01 dataset contains anthropometry data SMART survey Angola. can use summary() function examine range (summary statistics) quantitative variable: returns: graphical examination can also made:  “whiskers” boxplot extend 1.5 times interquartile range ends box (.e., lower upper quartiles). known inner fence. Data points outside inner fence considered mild outliers. NiPN data quality toolkit provides R language function outliersUV() uses method identify outliers: returns: can count number outliers use: returns: can express proportion: returns: may find easier use percentages: returns: muac values identified potential outliers possible muac values: outliersUV() function provides fence parameter alters threshold data point considered outlier. default fence = 1.5 defines inner fence (.e 1.5 times interquartile range lower quartile upper quartile). identify mild severe outliers. value fence = 3 defines outer fence (.e 3 times interquartile range lower quartile upper quartile). identify severe outliers : returns: something wrong values muac. intention muac variable records mid-upper-arm-circumference (MUAC) mm. impossibly small (.e. 11.1, 12.4, 13.2) impossibly large values (.e. 999.0). three impossibly small values probably due data recorded cm rather mm. probably safe change three values 111, 124 132. easiest record separately: alternative approach specify row numbers instead values: three 999.0 values missing values coded 999.0. safe set three values missing using special NA value: Range checks repeated editing data ensure problems fixed: Following boxplot muac variable made using: fixes incorrectly entered data missing values made.  now severe outliers: returns: usually better identify edit extreme univariate outliers, done , use scatterplot statistical distance methods described elsewhere toolkit identify potential outliers.","code":"svy <- rl.ex01 head(svy) #>   age sex weight height muac oedema #> 1  12   2    6.7   68.5  148      2 #> 2   6   1    6.4   65.0  125      2 #> 3   6   2    6.5   65.6  125      2 #> 4   8   1    7.2   68.4  144      2 #> 5  12   M    6.1   65.4  114      2 #> 6   8   1    7.7   66.5  146      2 summary(svy$muac) #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>    11.1   128.0   139.0   140.3   148.0   999.0 boxplot(svy$muac, horizontal = TRUE, xlab = \"MUAC (mm)\", frame.plot = FALSE) svy[outliersUV(svy$muac), ] #>  #> Univariate outliers : Lower fence = 98, Upper fence = 178 #>     age sex weight height  muac oedema #> 33   24   1    9.8   74.5 180.0      2 #> 93   12   2    6.7   67.0  96.0      1 #> 126  16   2    9.0   74.6 999.0      2 #> 135  18   2    8.5   74.5 999.0      2 #> 194  24   M    7.0   75.0  95.0      2 #> 227   8   M    6.2   66.0  11.1      2 #> 253  35   2    7.6   75.6  97.0      2 #> 381  24   1   10.8   82.8  12.4      2 #> 501  36   2   15.5   93.4 185.0      2 #> 594  21   2    9.8   76.5  13.2      2 #> 714  59   2   18.9   98.5 180.0      2 #> 752  48   2   15.6  102.2 999.0      2 #> 756  59   1   19.4  101.1 180.0      2 #> 873  59   1   20.6  109.4 179.0      2 table(outliersUV(svy$muac)) #>  #> Univariate outliers : Lower fence = 98, Upper fence = 178 #>  #> FALSE  TRUE  #>   892    14 prop.table(table(outliersUV(svy$muac))) #>  #> Univariate outliers : Lower fence = 98, Upper fence = 178 #>  #>      FALSE       TRUE  #> 0.98454746 0.01545254 prop.table(table(outliersUV(svy$muac))) * 100 #>  #> Univariate outliers : Lower fence = 98, Upper fence = 178 #>  #>     FALSE      TRUE  #> 98.454746  1.545254 #>  #> Univariate outliers : Lower fence = 98, Upper fence = 178 #>     age sex weight height  muac oedema #> 33   24   1    9.8   74.5 180.0      2 #> 93   12   2    6.7   67.0  96.0      1 #> 126  16   2    9.0   74.6 999.0      2 #> 135  18   2    8.5   74.5 999.0      2 #> 194  24   M    7.0   75.0  95.0      2 #> 227   8   M    6.2   66.0  11.1      2 #> 253  35   2    7.6   75.6  97.0      2 #> 381  24   1   10.8   82.8  12.4      2 #> 501  36   2   15.5   93.4 185.0      2 #> 594  21   2    9.8   76.5  13.2      2 #> 714  59   2   18.9   98.5 180.0      2 #> 752  48   2   15.6  102.2 999.0      2 #> 756  59   1   19.4  101.1 180.0      2 #> 873  59   1   20.6  109.4 179.0      2 svy[outliersUV(svy$muac, fence = 3), ] #>  #> Univariate outliers : Lower fence = 68, Upper fence = 208 #>     age sex weight height  muac oedema #> 126  16   2    9.0   74.6 999.0      2 #> 135  18   2    8.5   74.5 999.0      2 #> 227   8   M    6.2   66.0  11.1      2 #> 381  24   1   10.8   82.8  12.4      2 #> 594  21   2    9.8   76.5  13.2      2 #> 752  48   2   15.6  102.2 999.0      2 svy$muac[svy$muac == 11.1] <- 111 svy$muac[381] <- 124 svy$muac[594] <- 132 svy$muac[svy$muac == 999.00] <- NA summary(svy$muac) svy[outliersUV(svy$muac), ] svy[outliersUV(svy$muac, fence = 3), ] boxplot(svy$muac, horizontal = TRUE, xlab = \"MUAC (mm)\", frame.plot = FALSE) prop.table(table(outliersUV(svy$muac, fence = 3))) * 100 #>  #> Univariate outliers : Lower fence = 68, Upper fence = 208 #>  #> FALSE  #>   100"},{"path":"https://nutriverse.io/nipnTK/articles/rl.html","id":"editing-data","dir":"Articles","previous_headings":"","what":"Editing data","title":"Checking ranges and legal values","text":"edited records outliers R command line. good idea edit data command line using script containing required commands. script provides record changes made data. R also keeps record whatever command line “history file”. history file plain text file usually called .Rhistory stored home directory. regulatory authorities require keep history file. publications may require provide “reproducible data analysis”. edited annotated copy history file. edit() function provides basic tool editing data interactively. Editing data using edit() function typically three stage process: Create new object containing data requires editing. Use edit() function edit data new object closing data editor window finished. Replace old records edited records. try using separate copy example data: can check edits made using: fixed problems data return: edit() function works differently different operating systems different graphical user interfaces. using RStudio RAnalyticFlow OS X need install XQuartz want use edit() function. XQuartz available : https://www.xquartz.org/index.html","code":"x <- rl.ex01 records2update <- x[outliersUV(x$muac, fence = 3), ] records2update <- edit(records2update) x[row.names(records2update), ] <- records2update #>  #> Univariate outliers : Lower fence = 68, Upper fence = 208 x[outliersUV(x$muac, fence = 3), ] #>  #> Univariate outliers : Lower fence = 68, Upper fence = 208 #>     age sex weight height  muac oedema #> 126  16   2    9.0   74.6 999.0      2 #> 135  18   2    8.5   74.5 999.0      2 #> 227   8   M    6.2   66.0  11.1      2 #> 381  24   1   10.8   82.8  12.4      2 #> 594  21   2    9.8   76.5  13.2      2 #> 752  48   2   15.6  102.2 999.0      2"},{"path":"https://nutriverse.io/nipnTK/articles/rl.html","id":"checking-categorical-variables","dir":"Articles","previous_headings":"","what":"Checking categorical variables","title":"Checking ranges and legal values","text":"can use table() function examine codes used categorical variables. example: returns: intention sex variable coded using 1 male 2 female small number records codes M male F female used. mixed coding scheme like complicate data-management data-analysis. Data sex variable edited ensure consistent coding used: may find records contain meaningless codes. code 3 example dataset , probably, meaning likely simple data entry error. record checked corrected, possible. record corrected sex variable set missing: Legal value checks repeated editing ensure problems fixed: now returns: table contains cells values M, F, 3 R imported variable categorical “factor” variable: returns: can fix redefining levels sex variable:","code":"table(svy$sex) #>  #>   1   2   3   F   M  #> 404 458   1  24  19 svy$sex[svy$sex == \"M\"] <- 1 svy$sex[svy$sex == \"F\"] <- 2 svy$sex[svy$sex == 3] <- NA table(svy$sex) #>  #>   1   2   3   F   M  #> 423 482   0   0   0 str(svy) #> 'data.frame':    906 obs. of  6 variables: #>  $ age   : int  12 6 6 8 12 8 18 9 12 12 ... #>  $ sex   : Factor w/ 5 levels \"1\",\"2\",\"3\",\"F\",..: 2 1 2 1 1 1 1 1 2 1 ... #>  $ weight: num  6.7 6.4 6.5 7.2 6.1 7.7 6.4 7.8 7.5 6.5 ... #>  $ height: num  68.5 65 65.6 68.4 65.4 66.5 66.7 65.3 69.1 70.3 ... #>  $ muac  : num  148 125 125 144 114 146 119 140 138 121 ... #>  $ oedema: int  2 2 2 2 2 2 2 2 2 2 ... levels(svy$sex) <- c(\"1\", \"2\", NA, NA, NA) table(svy$sex) #>  #>   1   2  #> 423 482"},{"path":"https://nutriverse.io/nipnTK/articles/rl.html","id":"saving-changes","dir":"Articles","previous_headings":"","what":"Saving changes","title":"Checking ranges and legal values","text":"edited data. usually want save changes. simple save dataset comma-separated-value (CSV) text file using write.table() function: R can work variety files format usually simplest work simple text files.","code":"write.table(x = svy, file = \"rl.ex01.clean.csv\", sep = \",\", quote = FALSE,              row.names = FALSE, fileEncoding = \"ASCII\")"},{"path":"https://nutriverse.io/nipnTK/articles/sp.html","id":"identifying-outliers-by-observation","dir":"Articles","previous_headings":"","what":"Identifying outliers by observation","title":"Using scatterplots to identify outliers","text":"use dataset sp.ex01: dataset sp.ex01 contains anthropometric data SMART survey Democratic Republic Congo. look relationship height weight dataset: resulting plot shown .  clear positive linear relationship height weight (.e. weight increases increasing height along straight line). can assess strength relationship using Pearson correlation coefficient: returns: close one, indicates perfect positive association. , however points lie outside bulk plotted points. outliers may due errors data. presence oedema can associated increased weight. particular issue severe oedema. outlier high value weight given height due oedema. can check : pch = ifelse(svy$oedema == 1, 19, 1) tells plot() function plot filled circles oedema cases open circles children without oedema. resulting plot shown .  single high weight height outlier appears due presence oedema. filled circles located main mass plotted points show children oedema may body weight within normal range height. children may wasted suffering form severe acute malnutrition (SAM) known kwashiorkor. Outliers can identified eye. identify() function can help : Clicking point cause record (row) number associated point displayed plot (shown ).  Right-clicking plot pressing “escape” key stop identify(). behaviour identify() function may different use alternative user interface R RStudio RAnalyticFlow. identify() function , default, display record (row) numbers identified points. usually needed. Alternative labels can displayed. example: displays height weight values selected points. ability display custom labels useful variable (column) dataset contains unique record identifiers. useful able store record (row) numbers identified points: points shown previous figure clicked identify : return: can examine data identified points: returns: oedema data coded 1 present 2 absent. Data can checked edited needed. Note record 6 oedema case probably left alone. dataset many variables (columns) may specify variables (columns) interest: returns:","code":"svy <- sp.ex01 head(svy) #>   age sex weight height muac oedema #> 1  54   1   20.5  111.5  180      2 #> 2  53   1   19.3  108.0  167      2 #> 3  51   2   19.3  106.0  163      2 #> 4  44   1   18.9  111.0  163      2 #> 5  47   1   18.8  103.0  173      2 #> 6  48   2   18.6   95.3  171      1 plot(svy$height, svy$weight) cor(svy$height, svy$weight, method = \"pearson\", use = \"complete.obs\") #> [1] 0.9204116 plot(svy$height, svy$weight, pch = ifelse(svy$oedema == 1, 19, 1)) plot(svy$height, svy$weight, pch = ifelse(svy$oedema == 1, 19, 1)) identify(svy$height, svy$weight) plot(svy$height, svy$weight, pch = ifelse(svy$oedema == 1, 19, 1))  identify(svy$height, svy$weight,           labels = paste(svy$height, svy$weight, sep = \";\"),           cex = 0.75) plot(svy$height, svy$weight, pch = ifelse(svy$oedema == 1, 19, 1))  stored <- identify(svy$height, svy$weight) stored #> [1] \"1\"  \"6\"  \"16\" \"62\" \"66\" svy[stored, ] #>    age sex weight height muac oedema #> 1   54   1   20.5  111.5  180      2 #> 6   48   2   18.6   95.3  171      1 #> 16  30   1   16.9   92.5  188      2 #> 62  55   1   15.1  118.0  156      2 #> 66  56   1   15.0  115.0  148      2 svy[stored, c(\"weight\", \"height\", \"oedema\")] #>    weight height oedema #> 1    20.5  111.5      2 #> 6    18.6   95.3      1 #> 16   16.9   92.5      2 #> 62   15.1  118.0      2 #> 66   15.0  115.0      2"},{"path":"https://nutriverse.io/nipnTK/articles/sp.html","id":"identifying-outliers-using-statistical-distance","dir":"Articles","previous_headings":"","what":"Identifying outliers using statistical distance","title":"Using scatterplots to identify outliers","text":"formal method identifying outliers use measure statistical distance. common measure statistical distance applied scatterplot data Mahalanobis distance. treats bivariate probability distribution ellipsoid. Mahalanobis distance distance point centre mass distribution divided width ellipsoid direction point:  directions ellipsoid short axis test point must close centre mass distribution. directions ellipsoid long axis test point may distant centre mass distribution. NiPN data quality toolkit provides R language function outliersMD() uses Mahalanobis distance identify outliers dataset: returns set records identified eye: Data can checked edited needed. Note record 6 oedema case probably left alone. can use outliersMD() identify display outliers scatterplot:  outliersMD() function alpha parameter. default value alpha parameter alpha = 0.001. value used automatically unless another value specified. use alpha = 0.001 looking records values extreme expect find probability 0.001 problems data. can calculate number outliers expect see chance alpha = 0.001 using: returns: found five potential outliers. difference number expected number observed (.e. one expected vs. five observed) suggests identified outliers true outliers due data errors. Another way looking alpha parameter alters sensitivity outlierMD() function detecting outliers altering threshold distance used define outliers. can useful using outlierMD() function , , curvilinear relationships (see ). Larger values alpha tend detect potential outliers. example:  : almost cases default alpha = 0.001 appropriate. techniques outlined can used examine relationships pairs anthropometric variables (e.g. weight muac) identify outliers. sensible pairings variables examined.","code":"svy[outliersMD(svy$height, svy$weight), ] #>    age sex weight height muac oedema #> 1   54   1   20.5  111.5  180      2 #> 6   48   2   18.6   95.3  171      1 #> 16  30   1   16.9   92.5  188      2 #> 62  55   1   15.1  118.0  156      2 #> 66  56   1   15.0  115.0  148      2 plot(svy$height, svy$weight, pch = ifelse(outliersMD(svy$height, svy$weight), 19, 1)) round(nrow(svy) * 0.001) #> [1] 1 plot(svy$height, svy$weight,      pch = ifelse(outliersMD(svy$height, svy$weight, alpha = 0.01), 19, 1)) svy[outliersMD(svy$height,svy$weight, alpha = 0.01), ] #>     age sex weight height muac oedema #> 1    54   1   20.5  111.5  180      2 #> 2    53   1   19.3  108.0  167      2 #> 3    51   2   19.3  106.0  163      2 #> 4    44   1   18.9  111.0  163      2 #> 5    47   1   18.8  103.0  173      2 #> 6    48   2   18.6   95.3  171      1 #> 16   30   1   16.9   92.5  188      2 #> 32   43   1   16.2   92.6  166      2 #> 61   26   1   15.1   87.6  168      2 #> 62   55   1   15.1  118.0  156      2 #> 66   56   1   15.0  115.0  148      2 #> 477  38   2   10.3   94.6  160      2 #> 487  32   2   10.2   93.0  150      2 #> 722  17   2    8.6   63.3  136      2"},{"path":"https://nutriverse.io/nipnTK/articles/sp.html","id":"anthropometric-measurements-and-age","dir":"Articles","previous_headings":"","what":"Anthropometric measurements and age","title":"Using scatterplots to identify outliers","text":"also expect anthropometric variables associated age. relationship particularly strong children. less strong adults may weak even reversed older people. can explore relationship anthropometric variable age using techniques described . example:  problems approach. Age often reported recorded considerable age heaping. Age unlikely approximately normally distributed, assumption Mahalanobis distance method. relationship anthropometric variables age usually follows “growth curve” rather straight line. combination age heaping, non-normality, curvilinear relationship may reduce effectiveness Mahalanobis distance method detecting outliers. may useful, cases, increase value alpha parameter. example:  Outliers can listed using value alpha: Mahalanobis distance method usually robust enough deal age data provided appropriate value alpha used.","code":"plot(svy$age, svy$height, pch = ifelse(outliersMD(svy$age, svy$height), 19, 1))  svy[outliersMD(svy$age, svy$height), ] #>    age sex weight height muac oedema #> 4   44   1   18.9    111  163      2 #> 62  55   1   15.1    118  156      2 plot(svy$age, svy$height, pch = ifelse(outliersMD(svy$age, svy$height, alpha = 0.025), 19, 1)) svy[outliersMD(svy$age, svy$height, alpha = 0.025), ] #>     age sex weight height muac oedema #> 1    54   1   20.5  111.5  180      2 #> 4    44   1   18.9  111.0  163      2 #> 7    55   1   18.6  109.3  156      2 #> 14   48   1   17.0  109.0  175      2 #> 27   56   2   16.4  110.0  149      2 #> 62   55   1   15.1  118.0  156      2 #> 66   56   1   15.0  115.0  148      2 #> 113  58   1   14.2   92.0  148      2 #> 129  23   2   14.0   95.0  161      2 #> 190  15   2   13.0   90.5  150      2 #> 212  21   2   12.8   93.0  152      2 #> 378  51   2   11.2   83.0  141      2 #> 453  49   1   10.6   83.0  139      2 #> 461  54   2   10.5   86.8  132      2 #> 551  41   2    9.8   78.0  139      2 #> 599  50   1    9.5   84.7  123      2 #> 660  49   1    9.1   79.5  129      2 #> 722  17   2    8.6   63.3  136      2 #> 809  41   2    7.9   75.7  120      2 #> 881  30   1    6.5   69.6  103      2 #> 893  18   2    5.8   63.2  106      2"},{"path":"https://nutriverse.io/nipnTK/articles/sp.html","id":"difficult-relationships-for-the-mahalanobis-distance-method","dir":"Articles","previous_headings":"","what":"Difficult relationships for the Mahalanobis distance method","title":"Using scatterplots to identify outliers","text":"Mahalanobis distance method works well pairs variables long relationship two variables monotonic (.e. one variables always increases always decreases value variable increases value). usually case anthropometric data. explore use Mahalanobis distance method data monotonic using generated data:  clear relationship x y monotonic relationship (.e. always increasing decreasing). single obvious outlier. Mahalanobis distance method work well data. :  fails detect outlier. Relaxing alpha parameter: help. Relaxing alpha parameter :  results false positive results fails identify clear outlier. Although Mahalanobis distance used directly identify outliers non-monotonic relationships, can applied residuals fitted non-linear models. technique unlikely required anthropometric data covered toolkit. unlikely see non-monotonic relationships anthropometric data. likely see “growth curves” look like :  monotonic relationship. Mahalanobis distance method work well data. add clear outlier:  can detected using Mahalanobis distance method using slightly relaxed alpha value:","code":"x <- c(4, 8, 16, 17, 22, 27, 38, 40, 47, 48, 53, 55, 63, 71, 76, 85, 92, 96)  y <- c(6, 22, 34, 42, 51, 59, 64, 69, 70, 20, 70, 63, 63, 55, 46, 33, 19, 6) plot(x, y) plot(x, y, pch = ifelse(outliersMD(x, y), 19, 1)) plot(x, y, pch = ifelse(outliersMD(x, y, alpha = 0.025), 19, 1)) plot(x, y, pch = ifelse(outliersMD(x, y, alpha = 0.1), 19, 1)) set.seed(0) x <- 0:100 y <- 1 - exp(-x / 50) + rnorm(101, 0, 0.05)  plot(x, y) lines(x, 1 - exp(-x / 50), lty = 2) y[50] <- 0.3 plot(x, y) plot(x, y, pch = ifelse(outliersMD(x, y, alpha = 0.005), 19, 1))"},{"path":"https://nutriverse.io/nipnTK/articles/sp.html","id":"working-with-data-from-older-children","dir":"Articles","previous_headings":"","what":"Working with data from older children","title":"Using scatterplots to identify outliers","text":"now look using scatterplots Mahalanobis distance methods data older children. use sp.ex02 dataset: dataset sp.ex02 contains anthropometric data survey school-age (.e. 5 15 years) children Pakistan. can summarise dataset using: returns: baz variable contains BMI--age z-score calculated ageMonths, sex, weight, height variables using growth reference. key thing notice summary large number missing values waz variable. weight--age z-score calculated children aged older 120 months. can check using: gives: appears nothing odd large number missing values waz variable. investigate missing values baz variable: returns: data required calculate BMI--age z-score present. Given extreme values waz variable likely BMI--age z-scores records calculated, found outside upper lower flagging criteria, value baz set missing. check recalculate BMI--age z-scores. can use scatterplots examine relationship ageMonths, weight, height:    relationships simple younger children: Variability weight appears increase increasing ageMonths. relationship height ageMonths may entirely linear. relationship weight height clearly non-linear. relationships monotonic still able use Mahalanobis distance method identify outliers:    may want experiment different values alpha parameter outliersMD() function described . Records containing values identified outliers can listed: records can checked, edited (required), anthropometric indices recalculated.","code":"svy <- sp.ex02 head(svy) #>   region school ageMonths sex weight height   haz   waz   baz #> 1      1      1        64   1   13.9   97.7 -3.12 -2.56 -0.56 #> 2      1      1        72   1   21.1  118.7  0.56  0.21 -0.25 #> 3      1      1        75   2   15.6  103.7 -2.47 -2.02 -0.53 #> 4      1      1        75   2   16.0  102.7 -2.66 -1.82 -0.08 #> 5      1      1        75   2   17.5  108.7 -1.51 -1.16 -0.31 #> 6      1      1        79   1   15.0  101.0 -3.57 -2.99 -0.53 summary(svy) #>      region          school        ageMonths          sex        #>  Min.   :1.000   Min.   : 1.00   Min.   : 60.0   Min.   :1.000   #>  1st Qu.:3.000   1st Qu.: 8.00   1st Qu.: 83.0   1st Qu.:1.000   #>  Median :4.000   Median :15.00   Median : 98.0   Median :1.000   #>  Mean   :4.491   Mean   :15.51   Mean   :104.8   Mean   :1.397   #>  3rd Qu.:7.000   3rd Qu.:23.00   3rd Qu.:124.0   3rd Qu.:2.000   #>  Max.   :8.000   Max.   :30.00   Max.   :178.0   Max.   :2.000   #>                                                                  #>      weight          height           haz              waz         #>  Min.   :10.30   Min.   : 86.2   Min.   :-5.730   Min.   :-5.350   #>  1st Qu.:17.20   1st Qu.:108.7   1st Qu.:-2.640   1st Qu.:-2.380   #>  Median :21.30   Median :120.9   Median :-1.790   Median :-1.615   #>  Mean   :22.62   Mean   :121.2   Mean   :-1.705   Mean   :-1.581   #>  3rd Qu.:27.00   3rd Qu.:132.6   3rd Qu.:-0.790   3rd Qu.:-0.805   #>  Max.   :51.90   Max.   :164.2   Max.   : 3.550   Max.   : 3.010   #>                                                   NA's   :267      #>       baz          #>  Min.   :-4.7000   #>  1st Qu.:-1.2900   #>  Median :-0.7600   #>  Mean   :-0.7758   #>  3rd Qu.:-0.2100   #>  Max.   : 1.9900   #>  NA's   :8 by(svy$ageMonths, is.na(svy$waz), summary) #> is.na(svy$waz): FALSE #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>   60.00   76.00   88.00   88.24   99.00  120.00  #> ------------------------------------------------------------  #> is.na(svy$waz): TRUE #>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.  #>   121.0   125.5   141.0   140.8   151.0   178.0 svy[is.na(svy$baz), ] #>     region school ageMonths sex weight height   haz   waz baz #> 83       1      3       143   2   14.0  125.9 -3.64    NA  NA #> 158      2      6        96   1   12.3  118.4 -1.57 -5.26  NA #> 275      3     10        77   1   10.3  113.9 -0.88 -5.35  NA #> 415      4     15        75   1   33.0  108.3 -1.90  3.01  NA #> 508      5     19        85   2   11.1  111.5 -1.78 -4.84  NA #> 529      6     20        78   1   12.1  111.9 -1.37 -4.45  NA #> 761      8     28        62   1   13.3  115.4  0.99 -2.70  NA #> 806      8     29       100   1   13.2  121.2 -1.36 -5.01  NA plot(svy$ageMonths, svy$weight) plot(svy$ageMonths, svy$height) plot(svy$height, svy$weight) plot(svy$ageMonths, svy$weight,      pch = ifelse(outliersMD(svy$ageMonths, svy$weight), 19, 1)) plot(svy$ageMonths, svy$height,      pch = ifelse(outliersMD(svy$ageMonths, svy$height), 19, 1)) plot(svy$height, svy$weight,      pch = ifelse(outliersMD(svy$height, svy$weight), 19, 1)) svy[outliersMD(svy$ageMonths, svy$weight), ]  #>     region school ageMonths sex weight height   haz  waz   baz #> 57       1      2       161   1   47.0  158.7 -0.05   NA  0.05 #> 83       1      3       143   2   14.0  125.9 -3.64   NA    NA #> 139      2      5       123   2   46.5  144.9  0.64   NA  1.82 #> 319      3     11       143   1   45.2  156.0  1.06   NA  0.50 #> 407      4     14       132   1   46.2  155.3  1.73   NA  0.97 #> 415      4     15        75   1   33.0  108.3 -1.90 3.01    NA #> 672      7     24       175   1   50.5  163.5 -0.42   NA -0.25 #> 727      7     26       147   1   46.1  162.7  1.67   NA -0.14 #> 731      7     26       173   1   51.9  164.2 -0.21   NA -0.03 svy[outliersMD(svy$ageMonths, svy$height), ]  #>     region school ageMonths sex weight height  haz  waz   baz #> 457      5     17       110   1   37.5    155 3.55 1.62 -0.32 svy[outliersMD(svy$weight, svy$height), ] #>     region school ageMonths sex weight height   haz   waz   baz #> 57       1      2       161   1   47.0  158.7 -0.05    NA  0.05 #> 83       1      3       143   2   14.0  125.9 -3.64    NA    NA #> 139      2      5       123   2   46.5  144.9  0.64    NA  1.82 #> 275      3     10        77   1   10.3  113.9 -0.88 -5.35    NA #> 319      3     11       143   1   45.2  156.0  1.06    NA  0.50 #> 322      3     11       155   2   39.0  135.5 -3.01    NA  0.84 #> 369      4     13       118   2   35.5  129.7 -1.32  0.66  1.66 #> 407      4     14       132   1   46.2  155.3  1.73    NA  0.97 #> 415      4     15        75   1   33.0  108.3 -1.90  3.01    NA #> 438      4     15       163   2   39.0  138.3 -2.92    NA  0.41 #> 611      6     22       148   1   41.9  146.5 -0.66    NA  0.75 #> 672      7     24       175   1   50.5  163.5 -0.42    NA -0.25 #> 731      7     26       173   1   51.9  164.2 -0.21    NA -0.03 #> 806      8     29       100   1   13.2  121.2 -1.36 -5.01    NA"},{"path":"https://nutriverse.io/nipnTK/articles/sr.html","id":"analysis-by-age","dir":"Articles","previous_headings":"","what":"Analysis by age","title":"Sex ratio","text":"sex ratio test may performed age group separately. can apply sex ratio test age-group using () function: Note variable ycag created holds year-centred-age-group. approach assumes sex ratio independent age. approach make assumption use numbers male female children age-ranges population taken census data. useful source census data United States Census Bureau’s International Data Base: https://www.census.gov/data-tools/demo/idb/informationGateway.php source gives following estimates Afghanistan 2016: need ensure use age-ranges census: test sex ratio age group separately: tests find significant differences observed expected sex ratios. noted () tests might based small sample sizes: may, therefore, able detect large differences.","code":"svy$ycag <- recode(svy$age, \"6:17=1; 18:29=2; 30:41=3; 42:53=4; 54:59=5\")  by(svy$sex, svy$ycag, sexRatioTest, codes = c(1, 2), pop = c(2.658, 2.508)) #> svy$ycag: 1 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5145 #> Observed proportion male = 0.4879 #> X-squared = 0.4845, p = 0.4864 #>  #> ------------------------------------------------------------  #> svy$ycag: 2 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5145 #> Observed proportion male = 0.5152 #> X-squared = 0.0000, p = 1.0000 #>  #> ------------------------------------------------------------  #> svy$ycag: 3 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5145 #> Observed proportion male = 0.5228 #> X-squared = 0.0374, p = 0.8466 #>  #> ------------------------------------------------------------  #> svy$ycag: 4 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5145 #> Observed proportion male = 0.4875 #> X-squared = 0.3657, p = 0.5454 #>  #> ------------------------------------------------------------  #> svy$ycag: 5 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5145 #> Observed proportion male = 0.4627 #> X-squared = 0.5280, p = 0.4674 svy$ageGroup <- recode(svy$age, \"0:11=0; 12:23=1; 24:35=2; 36:47=3; 48:59=4\") sexRatioTest(svy$sex[svy$ageGroup == 0], pop = c(594602, 573956)) sexRatioTest(svy$sex[svy$ageGroup == 1], pop = c(550593, 533579)) sexRatioTest(svy$sex[svy$ageGroup == 2], pop = c(526827, 510479)) sexRatioTest(svy$sex[svy$ageGroup == 3], pop = c(509048, 493185)) sexRatioTest(svy$sex[svy$ageGroup == 4], pop = c(493521, 478137)) #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5088 #> Observed proportion male = 0.5047 #> X-squared = 0.0000, p = 1.0000 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5078 #> Observed proportion male = 0.4901 #> X-squared = 0.1885, p = 0.6642 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5079 #> Observed proportion male = 0.5374 #> X-squared = 0.6800, p = 0.4096 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5079 #> Observed proportion male = 0.5052 #> X-squared = 0.0000, p = 0.9978 #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.5079 #> Observed proportion male = 0.4552 #> X-squared = 1.4098, p = 0.2351 table(svy$ageGroup) #>  #>   0   1   2   3   4  #> 107 202 227 192 145"},{"path":"https://nutriverse.io/nipnTK/articles/sr.html","id":"sex-ratios-in-adults","dir":"Articles","previous_headings":"","what":"Sex ratios in adults","title":"Sex ratio","text":"data children usually expect something like one one male female sex ratio. usually case adults, especially older adults. retrieve survey dataset: dataset ah.ex01 comma-separated-value (CSV) file containing anthropometry data Rapid Assessment Method Older People (RAM-OP) survey Dadaab refugee camps Garissa, Kenya. survey older people, defined people aged sixty years older. type survey usually possible use camp administration data find expected male female sex ratio. information given RAM-OP survey report. camp population predominantly Somali. reported 188 thousand men 220 thousand women aged sixty years older Somalia (2010 estimates). sex ratio : : expected proportion population male : : proportion sample male: : looks much smaller expected proportion. sex ratio test: reports: proportion males sample significantly smaller expected. result due extraordinary nature population (e.g. camp population really many older women older men). also due selection bias survey. example, men likely women away home day household sample taken day systematically excluded active members male population. Note sex ratio test applies population surveys. surveys focus (e.g.) carers small children observed male female sex ratio likely strongly biased towards women. cases sensible apply sex ratio test.","code":"svy <- read.table(\"ah.ex01.csv\", header = TRUE, sep = \",\")  head(svy) #>   psu camp block age sex weight height demispan muac oedema #> 1   1  IFO   A01  90   1   40.8  159.3     77.2 20.0      2 #> 2   1  IFO   A01  60   2   69.8  155.3     78.3 35.3      2 #> 3   1  IFO   A01  63   2   51.7  156.8     80.5 25.5      2 #> 4   1  IFO   A01  74   2   61.1  158.9     83.5 27.0      2 #> 5   1  IFO   A01  65   2   55.1  156.9     85.5 24.5      2 #> 6   1  IFO   A01  62   2   56.7  158.1     86.3 26.1      2 188 / 220 #> [1] 0.8545455 188 / (188 + 220) #> [1] 0.4607843 prop.table(table(svy$sex)) #>  #>        1        2  #> 0.381113 0.618887 sexRatioTest(svy$sex, codes = c(1, 2), pop = c(188, 220)) #>  #>  Sex Ratio Test #>  #> Expected proportion male = 0.4608 #> Observed proportion male = 0.3811 #> X-squared = 14.8305, p = 0.0001"},{"path":"https://nutriverse.io/nipnTK/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Mark Myatt. Author. Ernest Guevarra. Author, maintainer.","code":""},{"path":"https://nutriverse.io/nipnTK/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Mark Myatt Ernest Guevarra (2023). nipnTK: National Information Platforms Nutrition (NiPN) Data Quality Toolkit R package version 0.1.1.9000 URL https://nutriverse.io/nipnTK/ DOI 10.5281/zenodo.4297897","code":"@Manual{,   title = {nipnTK: National Information Platforms for Nutrition (NiPN) Data Quality Toolkit},   author = {{Mark Myatt} and {Ernest Guevarra}},   year = {2023},   note = {R package version 0.1.1.9000},   url = {https://nutriverse.io/nipnTK/},   doi = {10.5281/zenodo.4297897}, }"},{"path":"https://nutriverse.io/nipnTK/index.html","id":"nipntk-national-information-platforms-for-nutrition-nipn-data-quality-toolkit-","dir":"","previous_headings":"","what":"National Information Platforms for Nutrition Anthropometric Data Toolkit","title":"National Information Platforms for Nutrition Anthropometric Data Toolkit","text":"National Information Platforms Nutrition (NiPN) initiative European Commission provide support countries strengthen information systems nutrition improve analysis data better inform strategic decisions faced prevent malnutrition consequences. part mandate, NiPN commissioned work development toolkit assess quality various nutrition-specific nutrition-related data. companion R package toolkit practical analytical methods can applied variables datasets assess quality. focus toolkit data required assess anthropometric status measurements weight, height length, MUAC, sex age. focus anthropometric status many presented methods applied types data. NiPN may commission additional toolkits examine variables types variables.","code":""},{"path":"https://nutriverse.io/nipnTK/index.html","id":"requirements","dir":"","previous_headings":"","what":"Requirements","title":"National Information Platforms for Nutrition Anthropometric Data Toolkit","text":"R version 3.4 higher Extensive use made R language environment statistical computing. free powerful data analysis system. R provides extensive language working data. companion package written using small subset R language. Many data quality activities described toolkit supported R functions included package written specifically purpose. simplify assessment quality data related anthropometry anthropometric indices.","code":""},{"path":"https://nutriverse.io/nipnTK/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"National Information Platforms for Nutrition Anthropometric Data Toolkit","text":"can install nipnTK CRAN: can install development version nipnTK GitHub :","code":"install.packages(\"nipnTK\") if(!require(remotes)) install.packages(\"remotes\") remotes::install_github(\"nutriverse/nipnTK\")"},{"path":"https://nutriverse.io/nipnTK/index.html","id":"usage","dir":"","previous_headings":"","what":"Usage","title":"National Information Platforms for Nutrition Anthropometric Data Toolkit","text":"Data quality assessed : Range checks value checks identify univariate outliers - guide Scatterplots statistical methods identify bivariate outliers - guide Use flags identify outliers anthropometric indices - guide Examining distribution statistics distribution measurements anthropometric indices - guide Assessing extent digit preference recorded measurements - guide Assessing extent age heaping recorded ages - guide Examining sex ratio - guide Examining age distributions age sex distributions - guide activities proposed order performed shown :","code":""},{"path":"https://nutriverse.io/nipnTK/index.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"National Information Platforms for Nutrition Anthropometric Data Toolkit","text":"find nipnTK package useful, please cite using suggested citation provided call citation function follows:","code":"citation(\"nipnTK\") #>  #> To cite nipnTK in publications use: #>  #>   Mark Myatt and Ernest Guevarra (2023). nipnTK: National Information #>   Platforms for Nutrition (NiPN) Data Quality Toolkit R package version #>   0.1.1.9000 URL https://nutriverse.io/nipnTK/ DOI #>   10.5281/zenodo.4297897 #>  #> A BibTeX entry for LaTeX users is #>  #>   @Manual{, #>     title = {nipnTK: National Information Platforms for Nutrition (NiPN) Data Quality Toolkit}, #>     author = {{Mark Myatt} and {Ernest Guevarra}}, #>     year = {2023}, #>     note = {R package version 0.1.1.9000}, #>     url = {https://nutriverse.io/nipnTK/}, #>     doi = {10.5281/zenodo.4297897}, #>   }"},{"path":"https://nutriverse.io/nipnTK/index.html","id":"community-guidelines","dir":"","previous_headings":"","what":"Community guidelines","title":"National Information Platforms for Nutrition Anthropometric Data Toolkit","text":"Feedback, bug reports feature requests welcome; file issues seek support . like contribute package, please see contributing guidelines. project released Contributor Code Conduct. participating project agree abide terms.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageChildren.html","id":null,"dir":"Reference","previous_headings":"","what":"Goodness of fit to an expected (model-based) age distribution — ageChildren","title":"Goodness of fit to an expected (model-based) age distribution — ageChildren","text":"Goodness fit expected (model-based) age distribution","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageChildren.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Goodness of fit to an expected (model-based) age distribution — ageChildren","text":"","code":"ageChildren(   age,   u5mr = 0,   groups = \"6:17=1; 18:29=2; 30:41=3; 42:53=4; 54:59=5\" )"},{"path":"https://nutriverse.io/nipnTK/reference/ageChildren.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Goodness of fit to an expected (model-based) age distribution — ageChildren","text":"age Vector ages u5mr five years mortality rate deaths / 10,000 persons / day groups Age groupings specified recodes parameter bbw::recode() function; default \"6:17=1; 18:29=2; 30:41=3; 42:53=4; 54:59=5\"","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageChildren.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Goodness of fit to an expected (model-based) age distribution — ageChildren","text":"list class \"ageChildren\" :","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageChildren.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Goodness of fit to an expected (model-based) age distribution — ageChildren","text":"","code":"# Chi-Squared test for age of children in dp.ex02 sample dataset using an # u5mr of 1 / 10,000 / day. svy <- dp.ex02 ac <- ageChildren(svy$age, u5mr = 1) ac #>  #> \tAge Test (Children) #>  #> X-squared = 21.4366, df = 4, p = 0.0003 #>   # Apply function to each sex separately # Males acM <- ageChildren(svy$age[svy$sex == 1], u5mr = 1) acM #>  #> \tAge Test (Children) #>  #> X-squared = 15.8496, df = 4, p = 0.0032 #>  # Females acF <- ageChildren(svy$age[svy$sex == 2], u5mr = 1)  # Simplified call to function by sex by(svy$age, svy$sex, ageChildren, u5mr = 1) #> svy$sex: 1 #>  #> \tAge Test (Children) #>  #> X-squared = 15.8496, df = 4, p = 0.0032 #>  #> ------------------------------------------------------------  #> svy$sex: 2 #>  #> \tAge Test (Children) #>  #> X-squared = 6.8429, df = 4, p = 0.1444 #>"},{"path":"https://nutriverse.io/nipnTK/reference/ageHeaping.html","id":null,"dir":"Reference","previous_headings":"","what":"Age-heaping analysis — ageHeaping","title":"Age-heaping analysis — ageHeaping","text":"Age heaping tendency report children's ages nearest year adults’ ages nearest multiple five ten years. Age heaping common. major reason data nutritional anthropometry surveys often analysed reported using broad age groups.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageHeaping.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Age-heaping analysis — ageHeaping","text":"","code":"ageHeaping(x, divisor = 12)"},{"path":"https://nutriverse.io/nipnTK/reference/ageHeaping.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Age-heaping analysis — ageHeaping","text":"x Vector ages divisor Divisor (usually 5, 6, 10, 12); default 12","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageHeaping.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Age-heaping analysis — ageHeaping","text":"list class \"ageHeaping\" :","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageHeaping.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Age-heaping analysis — ageHeaping","text":"","code":"# Test for age heaping using SMART survey data in Kabul, Afghanistan (dp.ex02) # using a divisor of 12 svy <- dp.ex02 ah12 <- ageHeaping(svy$age) ah12 #>  #> \tAge-heaping Analysis #>  #> data:\tRemainder of svy$age / 12 #> X-squared = 214.9588, df = 11, p-value = 0.0000 #>   # Test for age heaping using SMART survey data in Kabul, Afthanistan (dp.ex02) # using a divisor of 6 ah6 <- ageHeaping(svy$age, divisor = 6) ah6 #>  #> \tAge-heaping Analysis #>  #> data:\tRemainder of svy$age / 6 #> X-squared = 145.0275, df = 5, p-value = 0.0000 #>"},{"path":"https://nutriverse.io/nipnTK/reference/ageRatioTest.html","id":null,"dir":"Reference","previous_headings":"","what":"Age ratio test — ageRatioTest","title":"Age ratio test — ageRatioTest","text":"Age Ratio Test age-related test survey data quality.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageRatioTest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Age ratio test — ageRatioTest","text":"","code":"ageRatioTest(x, ratio = 0.85)"},{"path":"https://nutriverse.io/nipnTK/reference/ageRatioTest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Age ratio test — ageRatioTest","text":"x Numeric vector (age) ratio Expected age ratio","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageRatioTest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Age ratio test — ageRatioTest","text":"lit class \"ageRatioTest\" :","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ageRatioTest.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Age ratio test — ageRatioTest","text":"","code":"# Age-ratio test on survey dataset from Kabul, Afghanistan (dp.ex02) # with an age ratio of 0.85 svy <- dp.ex02 ageRatioTest(svy$age, ratio = 0.85) #>  #> \t\tAge Ratio Test (children's data) #>  #>                     Expected age ratio = 0.8500 #> Expected proportion aged 6 - 29 months = 0.4595 #>  #>                     Observed age ratio = 0.8654 #> Observed proportion aged 6 - 29 months = 0.4639 #>  #> X-squared = 0.0531, p = 0.8178 #>   # The age ratio test applied to data for each sex separately by(svy$age, svy$sex, ageRatioTest, ratio = 0.85) #> svy$sex: 1 #>  #> \t\tAge Ratio Test (children's data) #>  #>                     Expected age ratio = 0.8500 #> Expected proportion aged 6 - 29 months = 0.4595 #>  #>                     Observed age ratio = 0.8638 #> Observed proportion aged 6 - 29 months = 0.4635 #>  #> X-squared = 0.0145, p = 0.9041 #>  #> ------------------------------------------------------------  #> svy$sex: 2 #>  #> \t\tAge Ratio Test (children's data) #>  #>                     Expected age ratio = 0.8500 #> Expected proportion aged 6 - 29 months = 0.4595 #>  #>                     Observed age ratio = 0.8670 #> Observed proportion aged 6 - 29 months = 0.4644 #>  #> X-squared = 0.0247, p = 0.8750 #>"},{"path":"https://nutriverse.io/nipnTK/reference/ah.ex01.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for age heaping function — ah.ex01","title":"Example dataset for age heaping function — ah.ex01","text":"Anthropometric data Rapid Assessment Method Older People (RAM-OP) survey Dadaab refugee camp Garissa, Kenya. survey people aged sixty years older.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ah.ex01.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for age heaping function — ah.ex01","text":"","code":"ah.ex01"},{"path":"https://nutriverse.io/nipnTK/reference/ah.ex01.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for age heaping function — ah.ex01","text":"data frame 593 observations 10 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/ah.ex01.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Example dataset for age heaping function — ah.ex01","text":"Data courtesy HelpAge International","code":""},{"path":"https://nutriverse.io/nipnTK/reference/as.ex01.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for age and sex distributions function — as.ex01","title":"Example dataset for age and sex distributions function — as.ex01","text":"Data taken household rosters collected part household survey Tanzania.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/as.ex01.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for age and sex distributions function — as.ex01","text":"","code":"as.ex01"},{"path":"https://nutriverse.io/nipnTK/reference/as.ex01.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for age and sex distributions function — as.ex01","text":"data frame 8736 observations 2 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/as.ex02.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for age and sex distributions function — as.ex02","title":"Example dataset for age and sex distributions function — as.ex02","text":"Census data Tanzania taken Wolfram|Alpha knowledge engine.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/as.ex02.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for age and sex distributions function — as.ex02","text":"","code":"as.ex02"},{"path":"https://nutriverse.io/nipnTK/reference/as.ex02.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for age and sex distributions function — as.ex02","text":"data frame 20 observations 4 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/as.ex02.html","id":"source","dir":"Reference","previous_headings":"","what":"Source","title":"Example dataset for age and sex distributions function — as.ex02","text":"http://www.wolframalpha.com/input/?=Tanzania+age+distribution","code":""},{"path":"https://nutriverse.io/nipnTK/reference/boxText.html","id":null,"dir":"Reference","previous_headings":"","what":"Plot text in a coloured bounding box. — boxText","title":"Plot text in a coloured bounding box. — boxText","text":"Plot text coloured bounding box.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/boxText.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Plot text in a coloured bounding box. — boxText","text":"","code":"boxText(   x,   y,   labels,   cex = 0.75,   col = \"white\",   border = FALSE,   lwd = 0.5,   pad = TRUE )"},{"path":"https://nutriverse.io/nipnTK/reference/boxText.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Plot text in a coloured bounding box. — boxText","text":"x, y Co-ordinates text plotted labels Text plotted cex Character expansion col Background colour border Border colour lwd Border width pad Add padding (L) (R) ends bounding box","code":""},{"path":"https://nutriverse.io/nipnTK/reference/boxText.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Plot text in a coloured bounding box. — boxText","text":"","code":"## Use of boxtext in the ageHeaping plot function svy <- dp.ex02 ah12 <- ageHeaping(svy$age)  plot.new() boxText(x = as.numeric(names(ah12$tab)),         y = max(ah12$tab) * 0.1,         labels = paste(sprintf(fmt = \"%3.1f\", ah12$pct), \"%\", sep = \"\"),         cex = 0.5,         pad = TRUE)"},{"path":"https://nutriverse.io/nipnTK/reference/digitPreference.html","id":null,"dir":"Reference","previous_headings":"","what":"Digit preference test — digitPreference","title":"Digit preference test — digitPreference","text":"Digit preference observation final number measurement occurs greater frequency expected chance. can occur rounding, practice increasing decreasing value measurement nearest whole half unit, data made . digitPreference() function assesses level digit preference exists given dataset using digit preference score (DPS).","code":""},{"path":"https://nutriverse.io/nipnTK/reference/digitPreference.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Digit preference test — digitPreference","text":"","code":"digitPreference(x, digits = 1, values = 0:9)"},{"path":"https://nutriverse.io/nipnTK/reference/digitPreference.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Digit preference test — digitPreference","text":"x Numeric vector digits Number decimal places x. using digits = 1 (e.g.) allows 105 treated 105.0 values vector possible values final digit (default = 0:9)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/digitPreference.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Digit preference test — digitPreference","text":"list class \"digitPreference\" :","code":""},{"path":"https://nutriverse.io/nipnTK/reference/digitPreference.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Digit preference test — digitPreference","text":"DPS definition : Kari Kuulasmaa K, Hense HW, Tolonen H (MONICA Project), Quality Assessment Data Blood Pressure MONICA Project, MONICA Project e-publications . 9, , Geneva, May 1998 available https://www.thl.fi/publications/monica/bp/bpqa.htm","code":""},{"path":"https://nutriverse.io/nipnTK/reference/digitPreference.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Digit preference test — digitPreference","text":"","code":"# Digit preference test applied to anthropometric data from a single state # from a DHS survey in a West African country svy <- dp.ex01 digitPreference(svy$wt, digits = 1) #>  #> \tDigit Preference Score #>  #> data:\tsvy$wt #> Digit Preference Score (DPS) = 11.86 (Good) #>"},{"path":"https://nutriverse.io/nipnTK/reference/dist.ex01.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for distributions of variables and indices — dist.ex01","title":"Example dataset for distributions of variables and indices — dist.ex01","text":"Anthropometric data SMART survey Kabul, Afghanistan.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/dist.ex01.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for distributions of variables and indices — dist.ex01","text":"","code":"dist.ex01"},{"path":"https://nutriverse.io/nipnTK/reference/dist.ex01.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for distributions of variables and indices — dist.ex01","text":"data frame 873 observations 11 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex01.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for digit preference function — dp.ex01","title":"Example dataset for digit preference function — dp.ex01","text":"Anthropometric data single state Demographic Health Survey (DHS) West African country.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex01.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for digit preference function — dp.ex01","text":"","code":"dp.ex01"},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex01.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for digit preference function — dp.ex01","text":"data frame 796 observations 6 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex02.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for digit preference function — dp.ex02","title":"Example dataset for digit preference function — dp.ex02","text":"Anthropometric data SMART survey Kabul, Afghanistan comma-separated-value (CSV) file format. survey children aged 6-59 months old.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex02.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for digit preference function — dp.ex02","text":"","code":"dp.ex02"},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex02.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for digit preference function — dp.ex02","text":"data frame 873 observations 7 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex03.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for digit preference — dp.ex03","title":"Example dataset for digit preference — dp.ex03","text":"Anthropometric data sample children living refugee camp West African country.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex03.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for digit preference — dp.ex03","text":"","code":"dp.ex03"},{"path":"https://nutriverse.io/nipnTK/reference/dp.ex03.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for digit preference — dp.ex03","text":"data frame 374 observations 6 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex01.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for identifying outliers using flags — flag.ex01","title":"Example dataset for identifying outliers using flags — flag.ex01","text":"Anthropometric data SMART survey Sudan.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex01.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for identifying outliers using flags — flag.ex01","text":"","code":"flag.ex01"},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex01.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for identifying outliers using flags — flag.ex01","text":"data frame 786 observations 11 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex02.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for identifying outliers using flags — flag.ex02","title":"Example dataset for identifying outliers using flags — flag.ex02","text":"Anthropometric data survey children 11 years older attending school Ethiopia.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex02.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for identifying outliers using flags — flag.ex02","text":"","code":"flag.ex02"},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex02.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for identifying outliers using flags — flag.ex02","text":"data.frame 973 observations 7 variables.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex03.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for identifying outliers using flags — flag.ex03","title":"Example dataset for identifying outliers using flags — flag.ex03","text":"Anthropometric data national survey Nigeria.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex03.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for identifying outliers using flags — flag.ex03","text":"","code":"flag.ex03"},{"path":"https://nutriverse.io/nipnTK/reference/flag.ex03.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for identifying outliers using flags — flag.ex03","text":"data frame 18330 observations 10 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/fullTable.html","id":null,"dir":"Reference","previous_headings":"","what":"Fill out a one-dimensional table to include a specified range of values — fullTable","title":"Fill out a one-dimensional table to include a specified range of values — fullTable","text":"Fill one-dimensional table include specified range values","code":""},{"path":"https://nutriverse.io/nipnTK/reference/fullTable.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Fill out a one-dimensional table to include a specified range of values — fullTable","text":"","code":"fullTable(x, values = min(x, na.rm = TRUE):max(x, na.rm = TRUE))"},{"path":"https://nutriverse.io/nipnTK/reference/fullTable.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Fill out a one-dimensional table to include a specified range of values — fullTable","text":"x vector tabulate values vector values included table. Default : min(x, na.rm = TRUE):max(x, na.rm = TRUE)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/fullTable.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Fill out a one-dimensional table to include a specified range of values — fullTable","text":"table object including zero cells","code":""},{"path":"https://nutriverse.io/nipnTK/reference/fullTable.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Fill out a one-dimensional table to include a specified range of values — fullTable","text":"","code":"# Generate some artificial data and then apply `fullTable()` set.seed(0) finalDigits <- sample(x = 0:9, size = 1000, replace = TRUE) fullTable(finalDigits) #>   0   1   2   3   4   5   6   7   8   9  #>  95  80  96 102 106  98 109  95 109 110"},{"path":"https://nutriverse.io/nipnTK/reference/greensIndex.html","id":null,"dir":"Reference","previous_headings":"","what":"Green's Index of Dispersion — greensIndex","title":"Green's Index of Dispersion — greensIndex","text":"Implementation Green's Index Dispersion bootstrap. sampling distribution Green's Index well described hence bootstrapping used test whether distribution cases across primary sampling units random.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/greensIndex.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Green's Index of Dispersion — greensIndex","text":"","code":"greensIndex(data, psu, case, replicates = 999)"},{"path":"https://nutriverse.io/nipnTK/reference/greensIndex.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Green's Index of Dispersion — greensIndex","text":"data Survey dataset (R data.frame) psu Name variable holding PSU (cluster) data character vector length = 1 (e.g. psu) case Name variable holding case status character vector length = 1 (e.g. GAM). function assumes case status coded 1 = case replicates Number bootstrap replicates (default 9999)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/greensIndex.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Green's Index of Dispersion — greensIndex","text":"list class GI names:","code":""},{"path":"https://nutriverse.io/nipnTK/reference/greensIndex.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Green's Index of Dispersion — greensIndex","text":"value Green's Index can range -1/(n - 1) maximum uniformity (specific dataset) one maximum clumping. interpretation Green’s Index straightforward:","code":""},{"path":"https://nutriverse.io/nipnTK/reference/greensIndex.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Green's Index of Dispersion — greensIndex","text":"","code":"# Apply Green's Index using anthropometric data from a SMART survey in Sudan # (flag.ex01) svy <- flag.ex01 svy$flag <- 0 svy$flag <- ifelse(!is.na(svy$haz) & (svy$haz < -6 | svy$haz > 6),                    svy$flag + 1, svy$flag) svy$flag <- ifelse(!is.na(svy$whz) & (svy$whz < -5 | svy$whz > 5),                    svy$flag + 2, svy$flag) svy$flag <- ifelse(!is.na(svy$waz) & (svy$waz < -6 | svy$waz > 5),                    svy$flag + 4, svy$flag) svy <- svy[svy$flag == 0, ] svy$stunted <- ifelse(svy$haz < -2, 1, 2)  ## set seed to 0 to replicate results set.seed(0) greensIndex(data = svy, psu = \"psu\", case = \"stunted\") #>  #> \tGreen's Index of Dispersion #>  #> Green's Index (GI) of Dispersion  = -0.0014, 95% CI = (-0.0021, -0.0005) #> Maximum uniformity for this data  = -0.0035 #>                          p-value  =  0.0030 #>"},{"path":"https://nutriverse.io/nipnTK/reference/histNormal.html","id":null,"dir":"Reference","previous_headings":"","what":"Histogram with normal curve superimposed to help with “by-eye” assessments\nof normality of distribution — histNormal","title":"Histogram with normal curve superimposed to help with “by-eye” assessments\nof normality of distribution — histNormal","text":"Histogram normal curve superimposed help “-eye” assessments normality distribution","code":""},{"path":"https://nutriverse.io/nipnTK/reference/histNormal.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Histogram with normal curve superimposed to help with “by-eye” assessments\nof normality of distribution — histNormal","text":"","code":"histNormal(   x,   xlab = deparse(substitute(x)),   ylab = \"Frequency\",   main = deparse(substitute(x)),   breaks = \"Sturges\",   ylim = NULL )"},{"path":"https://nutriverse.io/nipnTK/reference/histNormal.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Histogram with normal curve superimposed to help with “by-eye” assessments\nof normality of distribution — histNormal","text":"x numeric vector xlab x-axis label ylab y-axis label main Plot title breaks Passed hist() function (?hist details) ylim y-axis limits","code":""},{"path":"https://nutriverse.io/nipnTK/reference/histNormal.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Histogram with normal curve superimposed to help with “by-eye” assessments\nof normality of distribution — histNormal","text":"","code":"# histNormal() with data from a SMART survey in Kabul, Afghanistan # (dist.ex01) svy <- dist.ex01 histNormal(svy$muac)  histNormal(svy$haz)  histNormal(svy$waz)  histNormal(svy$whz)"},{"path":"https://nutriverse.io/nipnTK/reference/national.SMART.html","id":null,"dir":"Reference","previous_headings":"","what":"Add SMART flags to a stratified sample survey (e.g. MICS, DHS, national\nSMART) — national.SMART","title":"Add SMART flags to a stratified sample survey (e.g. MICS, DHS, national\nSMART) — national.SMART","text":"Add SMART flags stratified sample survey (e.g. MICS, DHS, national SMART)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/national.SMART.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add SMART flags to a stratified sample survey (e.g. MICS, DHS, national\nSMART) — national.SMART","text":"","code":"national.SMART(x, strata, indices = c(\"haz\", \"whz\", \"waz\"))"},{"path":"https://nutriverse.io/nipnTK/reference/national.SMART.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add SMART flags to a stratified sample survey (e.g. MICS, DHS, national\nSMART) — national.SMART","text":"x Survey dataset (R data.frame) indices present strata Name column x defines strata indices Names columns x containing indices","code":""},{"path":"https://nutriverse.io/nipnTK/reference/national.SMART.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add SMART flags to a stratified sample survey (e.g. MICS, DHS, national\nSMART) — national.SMART","text":"data.frame structure x flagSMART column added. column coded using sums powers two","code":""},{"path":"https://nutriverse.io/nipnTK/reference/national.SMART.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add SMART flags to a stratified sample survey (e.g. MICS, DHS, national\nSMART) — national.SMART","text":"","code":"# Use the national.SMART() function to flag indices from a national # SMART survey in Nigeria (flag.ex03) svy <- flag.ex03 svyFlagged <- national.SMART(x = svy, strata = \"state\")  # Exclude records with flagging codes relevant to whz: svyFlagged <- svyFlagged[!(svyFlagged$flagSMART %in% c(2, 3, 6, 7)), ]"},{"path":"https://nutriverse.io/nipnTK/reference/nipnTK.html","id":null,"dir":"Reference","previous_headings":"","what":"NiPN data quality toolkit — nipnTK","title":"NiPN data quality toolkit — nipnTK","text":"library R functions assessing data-quality nutritional anthropometry surveys.","code":""},{"path":[]},{"path":"https://nutriverse.io/nipnTK/reference/nipnTK.html","id":"author","dir":"Reference","previous_headings":"","what":"Author","title":"NiPN data quality toolkit — nipnTK","text":"Maintainer: Ernest Guevarra ernest@guevarra.io (ORCID) Authors: Mark Myatt mark@brixtonhealth.com (ORCID)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/outliersMD.html","id":null,"dir":"Reference","previous_headings":"","what":"Mahalanobis distance to detect bivariate outliers — outliersMD","title":"Mahalanobis distance to detect bivariate outliers — outliersMD","text":"Mahalanobis distance detect bivariate outliers","code":""},{"path":"https://nutriverse.io/nipnTK/reference/outliersMD.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Mahalanobis distance to detect bivariate outliers — outliersMD","text":"","code":"outliersMD(x, y, alpha = 0.001)"},{"path":"https://nutriverse.io/nipnTK/reference/outliersMD.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Mahalanobis distance to detect bivariate outliers — outliersMD","text":"x Numeric vector y Numeric vector alpha Critical alpha value detect outlier (defaults 0.001)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/outliersMD.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Mahalanobis distance to detect bivariate outliers — outliersMD","text":"logical vector (TRUE outlier p < alpha)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/outliersMD.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Mahalanobis distance to detect bivariate outliers — outliersMD","text":"","code":"# Use outliersMD() to detect outliers in an anthropometric data from # a SMART survey from the Democratic Republic of Congo (sp.ex01) svy <- sp.ex01 svy[outliersMD(svy$height,svy$weight), ] #>    age sex weight height muac oedema #> 1   54   1   20.5  111.5  180      2 #> 6   48   2   18.6   95.3  171      1 #> 16  30   1   16.9   92.5  188      2 #> 62  55   1   15.1  118.0  156      2 #> 66  56   1   15.0  115.0  148      2"},{"path":"https://nutriverse.io/nipnTK/reference/outliersUV.html","id":null,"dir":"Reference","previous_headings":"","what":"IQR to detect univariate outliers — outliersUV","title":"IQR to detect univariate outliers — outliersUV","text":"IQR detect univariate outliers","code":""},{"path":"https://nutriverse.io/nipnTK/reference/outliersUV.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"IQR to detect univariate outliers — outliersUV","text":"","code":"outliersUV(x, fence = 1.5)"},{"path":"https://nutriverse.io/nipnTK/reference/outliersUV.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"IQR to detect univariate outliers — outliersUV","text":"x Numeric vector fence IQR multiplier (defaults 1.5)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/outliersUV.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"IQR to detect univariate outliers — outliersUV","text":"logical vector (TRUE outlier)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/outliersUV.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"IQR to detect univariate outliers — outliersUV","text":"","code":"# Use outliersUV() to detect univariate outliers in an anthropometric # dataset from a SMART survey from Angola (rl.ex01) svy <- rl.ex01 svy[outliersUV(svy$muac), ] #>  #> Univariate outliers : Lower fence = 98, Upper fence = 178 #>  #>     age sex weight height  muac oedema #> 33   24   1    9.8   74.5 180.0      2 #> 93   12   2    6.7   67.0  96.0      1 #> 126  16   2    9.0   74.6 999.0      2 #> 135  18   2    8.5   74.5 999.0      2 #> 194  24   M    7.0   75.0  95.0      2 #> 227   8   M    6.2   66.0  11.1      2 #> 253  35   2    7.6   75.6  97.0      2 #> 381  24   1   10.8   82.8  12.4      2 #> 501  36   2   15.5   93.4 185.0      2 #> 594  21   2    9.8   76.5  13.2      2 #> 714  59   2   18.9   98.5 180.0      2 #> 752  48   2   15.6  102.2 999.0      2 #> 756  59   1   19.4  101.1 180.0      2 #> 873  59   1   20.6  109.4 179.0      2"},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageChildren.html","id":null,"dir":"Reference","previous_headings":"","what":"Plot helper function for ageChildren() function — plot.ageChildren","title":"Plot helper function for ageChildren() function — plot.ageChildren","text":"Plot helper function ageChildren() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageChildren.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Plot helper function for ageChildren() function — plot.ageChildren","text":"","code":"# S3 method for ageChildren plot(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageChildren.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Plot helper function for ageChildren() function — plot.ageChildren","text":"x Object resulting applying ageChildren() function ... Additional barplot() graphical parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageChildren.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Plot helper function for ageChildren() function — plot.ageChildren","text":"Bar plot comparing table observed counts vs table expected counts","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageChildren.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Plot helper function for ageChildren() function — plot.ageChildren","text":"","code":"# Plot Chi-Squared test for age of children in dp.ex02 sample dataset using # an u5mr of 1 / 10,000 / day. svy <- dp.ex02 ac <- ageChildren(svy$age, u5mr = 1) plot(ac)"},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageHeaping.html","id":null,"dir":"Reference","previous_headings":"","what":"plot() helper functions for ageHeaping() functions — plot.ageHeaping","title":"plot() helper functions for ageHeaping() functions — plot.ageHeaping","text":"plot() helper functions ageHeaping() functions","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageHeaping.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"plot() helper functions for ageHeaping() functions — plot.ageHeaping","text":"","code":"# S3 method for ageHeaping plot(x, main = \"\", xlab = \"Remainder\", ylab = \"Frequency\", cex = 0.75, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageHeaping.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"plot() helper functions for ageHeaping() functions — plot.ageHeaping","text":"x Object resulting applying ageHeaping() function main Title plot xlab x-axis label; default Remainder ylab y-axis label; default Frequency cex Character expansion (numeric); default 0.75 ... Additional plot() graphical parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageHeaping.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"plot() helper functions for ageHeaping() functions — plot.ageHeaping","text":"Barplot frequency remainders age divided specified divisor","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.ageHeaping.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"plot() helper functions for ageHeaping() functions — plot.ageHeaping","text":"","code":"# Plot age heaping test results on SMART survey data in Kabul, Afghanistan # (dp.ex02) using a divisor of 12 svy <- dp.ex02 ah12 <- ageHeaping(svy$age) plot(ah12)"},{"path":"https://nutriverse.io/nipnTK/reference/plot.digitPreference.html","id":null,"dir":"Reference","previous_headings":"","what":"plot() helper function for digitPreference() function — plot.digitPreference","title":"plot() helper function for digitPreference() function — plot.digitPreference","text":"plot() helper function digitPreference() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.digitPreference.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"plot() helper function for digitPreference() function — plot.digitPreference","text":"","code":"# S3 method for digitPreference plot(x, main = \"\", xlab = \"Final Digit\", ylab = \"Frequency\", cex = 0.75, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/plot.digitPreference.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"plot() helper function for digitPreference() function — plot.digitPreference","text":"x Object resulting applying digitPreference() function. main Title plot xlab x-axis label; default \"Final Digit\" ylab y-axis label; default \"Frequency\" cex Character expansion; default 0.75 ... Additional plot() parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.digitPreference.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"plot() helper function for digitPreference() function — plot.digitPreference","text":"Plotted output digitPreference() function comparing frequencies various final digits","code":""},{"path":"https://nutriverse.io/nipnTK/reference/plot.digitPreference.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"plot() helper function for digitPreference() function — plot.digitPreference","text":"","code":"# Plot output of digit preference test applied to anthropometric data from a # single state from a DHS survey in a West African country svy <- dp.ex01 digitPreference(svy$wt, digits = 1) #>  #> \tDigit Preference Score #>  #> data:\tsvy$wt #> Digit Preference Score (DPS) = 11.86 (Good) #>  plot(digitPreference(svy$wt, digits = 1))"},{"path":"https://nutriverse.io/nipnTK/reference/print.ageChildren.html","id":null,"dir":"Reference","previous_headings":"","what":"print() helper function for ageChildren() function — print.ageChildren","title":"print() helper function for ageChildren() function — print.ageChildren","text":"print() helper function ageChildren() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageChildren.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"print() helper function for ageChildren() function — print.ageChildren","text":"","code":"# S3 method for ageChildren print(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/print.ageChildren.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"print() helper function for ageChildren() function — print.ageChildren","text":"x Object resulting applying ageChildren() function ... Additional print() arguments","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageChildren.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"print() helper function for ageChildren() function — print.ageChildren","text":"Printed output ageChildren() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageChildren.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"print() helper function for ageChildren() function — print.ageChildren","text":"","code":"# Print Chi-Squared test for age of children in dp.ex02 sample dataset using # an u5mr of 1 / 10,000 / day. svy <- dp.ex02 ac <- ageChildren(svy$age, u5mr = 1) print(ac) #>  #> \tAge Test (Children) #>  #> X-squared = 21.4366, df = 4, p = 0.0003 #>"},{"path":"https://nutriverse.io/nipnTK/reference/print.ageHeaping.html","id":null,"dir":"Reference","previous_headings":"","what":"print() helper functions for ageHeaping() functions — print.ageHeaping","title":"print() helper functions for ageHeaping() functions — print.ageHeaping","text":"print() helper functions ageHeaping() functions","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageHeaping.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"print() helper functions for ageHeaping() functions — print.ageHeaping","text":"","code":"# S3 method for ageHeaping print(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/print.ageHeaping.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"print() helper functions for ageHeaping() functions — print.ageHeaping","text":"x Object resulting applying ageHeaping() function ... Additional print() arguments","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageHeaping.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"print() helper functions for ageHeaping() functions — print.ageHeaping","text":"Printed output ageHeaping() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageHeaping.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"print() helper functions for ageHeaping() functions — print.ageHeaping","text":"","code":"# Print age heaping test on SMART survey data in Kabul, Afghanistan (dp.ex02) # using a divisor of 12 svy <- dp.ex02 ah12 <- ageHeaping(svy$age) print(ah12) #>  #> \tAge-heaping Analysis #>  #> data:\tRemainder of svy$age / 12 #> X-squared = 214.9588, df = 11, p-value = 0.0000 #>"},{"path":"https://nutriverse.io/nipnTK/reference/print.ageRatioTest.html","id":null,"dir":"Reference","previous_headings":"","what":"print() helper function for ageRatioTest() function — print.ageRatioTest","title":"print() helper function for ageRatioTest() function — print.ageRatioTest","text":"print() helper function ageRatioTest() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageRatioTest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"print() helper function for ageRatioTest() function — print.ageRatioTest","text":"","code":"# S3 method for ageRatioTest print(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/print.ageRatioTest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"print() helper function for ageRatioTest() function — print.ageRatioTest","text":"x Object resulting applying ageRatioTest() function ... Additional print() arguments","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageRatioTest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"print() helper function for ageRatioTest() function — print.ageRatioTest","text":"Printed output ageRatioTest() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.ageRatioTest.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"print() helper function for ageRatioTest() function — print.ageRatioTest","text":"","code":"# Print age-ratio test results for survey dataset from Kabul, Afghanistan (dp.ex02) svy <- dp.ex02 print(ageRatioTest(svy$age, ratio = 0.85)) #>  #> \t\tAge Ratio Test (children's data) #>  #>                     Expected age ratio = 0.8500 #> Expected proportion aged 6 - 29 months = 0.4595 #>  #>                     Observed age ratio = 0.8654 #> Observed proportion aged 6 - 29 months = 0.4639 #>  #> X-squared = 0.0531, p = 0.8178 #>"},{"path":"https://nutriverse.io/nipnTK/reference/print.digitPreference.html","id":null,"dir":"Reference","previous_headings":"","what":"print() helper function for digitPreference() function — print.digitPreference","title":"print() helper function for digitPreference() function — print.digitPreference","text":"print() helper function digitPreference() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.digitPreference.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"print() helper function for digitPreference() function — print.digitPreference","text":"","code":"# S3 method for digitPreference print(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/print.digitPreference.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"print() helper function for digitPreference() function — print.digitPreference","text":"x Object resulting applying digitPreference() function. ... Additional print() parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.digitPreference.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"print() helper function for digitPreference() function — print.digitPreference","text":"Printed output digitPreference() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.digitPreference.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"print() helper function for digitPreference() function — print.digitPreference","text":"","code":"# Print output of digit preference test applied to anthropometric data from a #single state from a DHS survey in a West African country svy <- dp.ex01 print(digitPreference(svy$wt, digits = 1)) #>  #> \tDigit Preference Score #>  #> data:\tsvy$wt #> Digit Preference Score (DPS) = 11.86 (Good) #>"},{"path":"https://nutriverse.io/nipnTK/reference/print.greensIndex.html","id":null,"dir":"Reference","previous_headings":"","what":"print() helper function for print.greensIndex() function — print.greensIndex","title":"print() helper function for print.greensIndex() function — print.greensIndex","text":"print() helper function print.greensIndex() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.greensIndex.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"print() helper function for print.greensIndex() function — print.greensIndex","text":"","code":"# S3 method for greensIndex print(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/print.greensIndex.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"print() helper function for print.greensIndex() function — print.greensIndex","text":"x Object resulting applying greensIndex() function ... Additional print() parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.greensIndex.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"print() helper function for print.greensIndex() function — print.greensIndex","text":"Printed output greensIndex() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.greensIndex.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"print() helper function for print.greensIndex() function — print.greensIndex","text":"","code":"# Apply Green's Index using anthropometric data from a SMART survey in Sudan # (flag.ex01) svy <- flag.ex01 svy$flag <- 0 svy$flag <- ifelse(!is.na(svy$haz) & (svy$haz < -6 | svy$haz > 6), svy$flag + 1, svy$flag) svy$flag <- ifelse(!is.na(svy$whz) & (svy$whz < -5 | svy$whz > 5), svy$flag + 2, svy$flag) svy$flag <- ifelse(!is.na(svy$waz) & (svy$waz < -6 | svy$waz > 5), svy$flag + 4, svy$flag) svy <- svy[svy$flag == 0, ] svy$stunted <- ifelse(svy$haz < -2, 1, 2) gi <- greensIndex(data = svy, psu = \"psu\", case = \"stunted\") print(gi) #>  #> \tGreen's Index of Dispersion #>  #> Green's Index (GI) of Dispersion  = -0.0013, 95% CI = (-0.0021, -0.0004) #> Maximum uniformity for this data  = -0.0035 #>                          p-value  =  0.0030 #>"},{"path":"https://nutriverse.io/nipnTK/reference/print.sexRatioTest.html","id":null,"dir":"Reference","previous_headings":"","what":"print() helper function for sexRatioTest() function — print.sexRatioTest","title":"print() helper function for sexRatioTest() function — print.sexRatioTest","text":"print() helper function sexRatioTest() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.sexRatioTest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"print() helper function for sexRatioTest() function — print.sexRatioTest","text":"","code":"# S3 method for sexRatioTest print(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/print.sexRatioTest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"print() helper function for sexRatioTest() function — print.sexRatioTest","text":"x Output resulting applying sexRatioTest() function ... Additional print() parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.sexRatioTest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"print() helper function for sexRatioTest() function — print.sexRatioTest","text":"Printed output sexRatioTest() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.sexRatioTest.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"print() helper function for sexRatioTest() function — print.sexRatioTest","text":"","code":"# Use sexRatioTest() on household roster data from a survey in Tanzania # (as.ex01) and census data of Tanzania extracted from Wolfram|Alpha knowledge # engine (as.ex02) svy <- as.ex01 ref <- as.ex02 censusM <- sum(ref$Males) censusF <- sum(ref$Females) srt <- sexRatioTest(svy$sex, codes = c(1, 2), pop = c(censusM, censusF)) print(srt) #>  #> \tSex Ratio Test #>  #> Expected proportion male = 0.4988 #> Observed proportion male = 0.4914 #> X-squared = 1.8713, p = 0.1713 #>"},{"path":"https://nutriverse.io/nipnTK/reference/print.skewKurt.html","id":null,"dir":"Reference","previous_headings":"","what":"print() helper function for skewKurt() function — print.skewKurt","title":"print() helper function for skewKurt() function — print.skewKurt","text":"print() helper function skewKurt() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.skewKurt.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"print() helper function for skewKurt() function — print.skewKurt","text":"","code":"# S3 method for skewKurt print(x, ...)"},{"path":"https://nutriverse.io/nipnTK/reference/print.skewKurt.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"print() helper function for skewKurt() function — print.skewKurt","text":"x Object resulting applying skewKurt() function ... Additional print() parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.skewKurt.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"print() helper function for skewKurt() function — print.skewKurt","text":"Printed output skewKurt() function","code":""},{"path":"https://nutriverse.io/nipnTK/reference/print.skewKurt.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"print() helper function for skewKurt() function — print.skewKurt","text":"","code":"# Use skewKurt() on an anthropometric data from a SMART survey in # Kabul, Afghanistan (dist.ex01) svy <- dist.ex01 sk <- skewKurt(svy$muac) print(sk) #>  #> \tSkewness and kurtosis #>  #> Skewness = +0.0525\tSE = 0.0828\tz = 0.6348\tp = 0.5256 #> Kurtosis = -0.2412\tSE = 0.1653\tz = 1.4586\tp = 0.1447 #>"},{"path":"https://nutriverse.io/nipnTK/reference/pyramid.plot.html","id":null,"dir":"Reference","previous_headings":"","what":"Pyramid plot function for creating population pyramids. — pyramid.plot","title":"Pyramid plot function for creating population pyramids. — pyramid.plot","text":"Pyramid plot function creating population pyramids.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/pyramid.plot.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Pyramid plot function for creating population pyramids. — pyramid.plot","text":"","code":"pyramid.plot(   x,   g,   main = paste(\"Pyramid plot of\", deparse(substitute(x)), \"by\", deparse(substitute(g))),   xlab = paste(deparse(substitute(g)), \"(\", levels(as.factor(g))[1], \"/\",     levels(as.factor(g))[2], \")\"),   ylab = deparse(substitute(x)),   col = \"white\",   ... )"},{"path":"https://nutriverse.io/nipnTK/reference/pyramid.plot.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Pyramid plot function for creating population pyramids. — pyramid.plot","text":"x Vector ages (usually grouped) g Vector groups (usually sex) main Plot title xlab x-axis label ylab y-axis label col Colours bars. Either single colour (default col = \"white\") bars, two colours (e.g. col = c(\"lightblue\", \"pink\")) left hand side bars right hand side bars respectively, many colours allocated checkerboard basis bar ... graphical parameters","code":""},{"path":"https://nutriverse.io/nipnTK/reference/pyramid.plot.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Pyramid plot function for creating population pyramids. — pyramid.plot","text":"table x g (invisible)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/pyramid.plot.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Pyramid plot function for creating population pyramids. — pyramid.plot","text":"","code":"# Use pyramid.plot() on anthropometric data from a SMART survey in # Kabul, Afghanistan (dp.ex02) svy <- dp.ex02 pyramid.plot(svy$age, svy$sex)"},{"path":"https://nutriverse.io/nipnTK/reference/qqNormalPlot.html","id":null,"dir":"Reference","previous_headings":"","what":"Normal quantile-quantile plot — qqNormalPlot","title":"Normal quantile-quantile plot — qqNormalPlot","text":"Normal quantile-quantile plot","code":""},{"path":"https://nutriverse.io/nipnTK/reference/qqNormalPlot.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Normal quantile-quantile plot — qqNormalPlot","text":"","code":"qqNormalPlot(x)"},{"path":"https://nutriverse.io/nipnTK/reference/qqNormalPlot.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Normal quantile-quantile plot — qqNormalPlot","text":"x numeric vector","code":""},{"path":"https://nutriverse.io/nipnTK/reference/qqNormalPlot.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Normal quantile-quantile plot — qqNormalPlot","text":"","code":"# qqNormalPlot() with data from a SMART survey in Kabul, Afghanistan # (dist.ex01) svy <- dist.ex01 qqNormalPlot(svy$muac)  qqNormalPlot(svy$haz)  qqNormalPlot(svy$waz)  qqNormalPlot(svy$whz)"},{"path":"https://nutriverse.io/nipnTK/reference/rl.ex01.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for checking ranges and legal values — rl.ex01","title":"Example dataset for checking ranges and legal values — rl.ex01","text":"Anthropometric data SMART survey Angola.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/rl.ex01.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for checking ranges and legal values — rl.ex01","text":"","code":"rl.ex01"},{"path":"https://nutriverse.io/nipnTK/reference/rl.ex01.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for checking ranges and legal values — rl.ex01","text":"data frame 906 observations 6 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/sexRatioTest.html","id":null,"dir":"Reference","previous_headings":"","what":"Sex Ratio Test — sexRatioTest","title":"Sex Ratio Test — sexRatioTest","text":"Sex Ratio Test","code":""},{"path":"https://nutriverse.io/nipnTK/reference/sexRatioTest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Sex Ratio Test — sexRatioTest","text":"","code":"sexRatioTest(sex, codes = c(1, 2), pop = c(1, 1))"},{"path":"https://nutriverse.io/nipnTK/reference/sexRatioTest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Sex Ratio Test — sexRatioTest","text":"sex Numeric vector (sex) codes Codes used identify males females (order) pop Relative populations males females (order)","code":""},{"path":"https://nutriverse.io/nipnTK/reference/sexRatioTest.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Sex Ratio Test — sexRatioTest","text":"list class \"sexRatioTest\" :","code":""},{"path":"https://nutriverse.io/nipnTK/reference/sexRatioTest.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Sex Ratio Test — sexRatioTest","text":"","code":"# Use sexRatioTest() on household roster data from a survey in Tanzania # (as.ex01) and census data of Tanzania extracted from Wolfram|Alpha knowledge # engine (as.ex02) svy <- as.ex01 ref <- as.ex02 censusM <- sum(ref$Males) censusF <- sum(ref$Females) sexRatioTest(svy$sex, codes = c(1, 2), pop = c(censusM, censusF)) #>  #> \tSex Ratio Test #>  #> Expected proportion male = 0.4988 #> Observed proportion male = 0.4914 #> X-squared = 1.8713, p = 0.1713 #>"},{"path":"https://nutriverse.io/nipnTK/reference/skewKurt.html","id":null,"dir":"Reference","previous_headings":"","what":"Skew and kurtosis — skewKurt","title":"Skew and kurtosis — skewKurt","text":"Skew kurtosis","code":""},{"path":"https://nutriverse.io/nipnTK/reference/skewKurt.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Skew and kurtosis — skewKurt","text":"","code":"skewKurt(x)"},{"path":"https://nutriverse.io/nipnTK/reference/skewKurt.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Skew and kurtosis — skewKurt","text":"x Numeric vector","code":""},{"path":"https://nutriverse.io/nipnTK/reference/skewKurt.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Skew and kurtosis — skewKurt","text":"list class \"skewKurt\" :","code":""},{"path":"https://nutriverse.io/nipnTK/reference/skewKurt.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Skew and kurtosis — skewKurt","text":"","code":"# Use skewKurt() on an anthropometric data from a SMART survey in # Kabul, Afghanistan (dist.ex01) svy <- dist.ex01 skewKurt(svy$muac) #>  #> \tSkewness and kurtosis #>  #> Skewness = +0.0525\tSE = 0.0828\tz = 0.6348\tp = 0.5256 #> Kurtosis = -0.2412\tSE = 0.1653\tz = 1.4586\tp = 0.1447 #>"},{"path":"https://nutriverse.io/nipnTK/reference/sp.ex01.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for using scatterplots to identify outliers — sp.ex01","title":"Example dataset for using scatterplots to identify outliers — sp.ex01","text":"Anthropometric data SMART survey Democratic Republic Congo.","code":""},{"path":"https://nutriverse.io/nipnTK/reference/sp.ex01.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for using scatterplots to identify outliers — sp.ex01","text":"","code":"sp.ex01"},{"path":"https://nutriverse.io/nipnTK/reference/sp.ex01.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for using scatterplots to identify outliers — sp.ex01","text":"data frame 895 observations 6 variables","code":""},{"path":"https://nutriverse.io/nipnTK/reference/sp.ex02.html","id":null,"dir":"Reference","previous_headings":"","what":"Example dataset for using scatterplots to identify outliers — sp.ex02","title":"Example dataset for using scatterplots to identify outliers — sp.ex02","text":"Anthropometric data survey school-age (.e., 5 15 years) children Pakistan","code":""},{"path":"https://nutriverse.io/nipnTK/reference/sp.ex02.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Example dataset for using scatterplots to identify outliers — sp.ex02","text":"","code":"sp.ex02"},{"path":"https://nutriverse.io/nipnTK/reference/sp.ex02.html","id":"format","dir":"Reference","previous_headings":"","what":"Format","title":"Example dataset for using scatterplots to identify outliers — sp.ex02","text":"data frame 849 observations 9 variables","code":""},{"path":"https://nutriverse.io/nipnTK/news/index.html","id":"nipntk-0119000","dir":"Changelog","previous_headings":"","what":"nipnTK 0.1.1.9000","title":"nipnTK 0.1.1.9000","text":"Second release nipnTK. GitHub-development release. release:","code":""},{"path":"https://nutriverse.io/nipnTK/news/index.html","id":"general-updates-0-1-1-9000","dir":"Changelog","previous_headings":"","what":"General updates","title":"nipnTK 0.1.1.9000","text":"remove appveyor.yml Appveyor ci/cd workflow update GitHub Actions workflow latest 5 system standard check update GitHub Actions workflow coverage testing change default git branch name master main add CITATION update CONTRIBUTOR guidelines upgrade website bootstrap 5 edit typo vignette","code":""},{"path":"https://nutriverse.io/nipnTK/news/index.html","id":"nipntk-010","dir":"Changelog","previous_headings":"","what":"nipnTK 0.1.0","title":"nipnTK 0.1.0","text":"CRAN release: 2020-11-30 first CRAN release nipnTK.","code":""}]
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 8b5ae32..d0c9780 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -1,10 +1,11 @@
 Anthro
 AnthroPlus
-AppVeyor
 Appveyor
 BAZ
 BMI
+CMD
 CodeFactor
+Codecov
 DHS
 DPS
 Dadaab
@@ -19,6 +20,7 @@ HelpAge
 Hense
 Kuulasmaa
 LCL
+Lifecycle
 MADs
 MUAC
 NCHS
@@ -47,7 +49,6 @@ bpqa
 cd
 cex
 ci
-codecov
 cran
 csv
 demispan
@@ -65,7 +66,6 @@ ht
 htm
 kgs
 kwashiorkor
-lifecycle
 mads
 minGI
 monica
diff --git a/vignettes/ad.Rmd b/vignettes/ad.Rmd
index 0d94ce4..fd95344 100644
--- a/vignettes/ad.Rmd
+++ b/vignettes/ad.Rmd
@@ -414,7 +414,7 @@ as the tall central columns that exceed the expected values shown by the overlai
 Skew and kurtosis are both used in SMART plausibility checks. Table below shows how skew and kurtosis statistics are applied by SMART.
 
 ```{r tab2, echo = FALSE, eval = TRUE}
-col1 <- c("< 0.2", "≥ 0.2 and < 0.4", "≥ 0.6 and < 0.6", "≥ 0.6")
+col1 <- c("< 0.2", "≥ 0.2 and < 0.4", "≥ 0.4 and < 0.6", "≥ 0.6")
 col2 <- c("Excellent", "Good", "Acceptable", "Problematic")
 
 tab <- data.frame(col1, col2)