{ "cells": [ { "cell_type": "markdown", "id": "graduate-utility", "metadata": {}, "source": [ "# Fondamentaux de R" ] }, { "cell_type": "markdown", "id": "mineral-wallet", "metadata": {}, "source": [ "## Eléments généraux de programmation" ] }, { "cell_type": "code", "execution_count": 4, "id": "enormous-headset", "metadata": { "ExecuteTime": { "end_time": "2021-03-08T13:47:54.061706Z", "start_time": "2021-03-08T13:47:54.049Z" } }, "outputs": [], "source": [ "# install.packages(c(\"Hmisc\", \"FactoMineR\"))" ] }, { "cell_type": "code", "execution_count": 39, "id": "thrown-mathematics", "metadata": { "ExecuteTime": { "end_time": "2021-03-08T14:07:33.467850Z", "start_time": "2021-03-08T14:07:33.453Z" }, "code_folding": [] }, "outputs": [], "source": [ "# exemple fonction\n", "percentage <- function(x, y, ...){\n", " result <- (x*100)/y\n", " rounded_res <-round(result, 2) \n", " cat(x, \"is\", rounded_res,\"percent of\", y)\n", "}" ] }, { "cell_type": "code", "execution_count": 6, "id": "dominican-captain", "metadata": { "ExecuteTime": { "end_time": "2021-03-08T13:48:04.840792Z", "start_time": "2021-03-08T13:48:04.827Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "24 is 9.38 percent of 256" ] } ], "source": [ "percentage(24,256)" ] }, { "cell_type": "code", "execution_count": 9, "id": "hourly-mechanics", "metadata": { "ExecuteTime": { "end_time": "2021-03-08T13:50:34.066509Z", "start_time": "2021-03-08T13:50:34.052Z" } }, "outputs": [ { "data": { "text/html": [ "3" ], "text/latex": [ "3" ], "text/markdown": [ "3" ], "text/plain": [ "[1] 3" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "nchar(\"a b\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "oriented-reminder", "metadata": { "ExecuteTime": { "end_time": "2021-03-08T13:53:06.166198Z", "start_time": "2021-03-08T13:53:05.469Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "
  1. 'This is an example character vector.'
  2. 'This is another example character vector.'
  3. 'This is yet another example character vector.'
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 'This is an example character vector.'\n", "\\item 'This is another example character vector.'\n", "\\item 'This is yet another example character vector.'\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 'This is an example character vector.'\n", "2. 'This is another example character vector.'\n", "3. 'This is yet another example character vector.'\n", "\n", "\n" ], "text/plain": [ "[1] \"This is an example character vector.\" \n", "[2] \"This is another example character vector.\" \n", "[3] \"This is yet another example character vector.\"" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "load_v <- scan(file=\"https://bit.ly/3mlXPoY\", what=\"char\", sep=\"\\n\")\n", "load_v" ] }, { "cell_type": "code", "execution_count": 29, "id": "everyday-wrapping", "metadata": { "ExecuteTime": { "end_time": "2021-03-08T14:05:37.234074Z", "start_time": "2021-03-08T14:05:37.214Z" } }, "outputs": [ { "data": { "text/html": [ "3" ], "text/latex": [ "3" ], "text/markdown": [ "3" ], "text/plain": [ "[1] 3" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ " chr [1:3] \"This is an example character vector.\" ...\n" ] }, { "data": { "text/html": [ "'character'" ], "text/latex": [ "'character'" ], "text/markdown": [ "'character'" ], "text/plain": [ "[1] \"character\"" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "length(load_v)\n", "str(load_v)\n", "typeof(load_v)" ] }, { "cell_type": "code", "execution_count": 31, "id": "alike-anthropology", "metadata": { "ExecuteTime": { "end_time": "2021-03-08T14:05:50.362379Z", "start_time": "2021-03-08T14:05:50.340Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\n", "
A matrix: 3 × 3 of type dbl
BNCCOCAGloWbE
each + N30708141012535316
every + N28481181127857726
each and every + N 140 907 14529
\n" ], "text/latex": [ "A matrix: 3 × 3 of type dbl\n", "\\begin{tabular}{r|lll}\n", " & BNC & COCA & GloWbE\\\\\n", "\\hline\n", "\teach + N & 30708 & 141012 & 535316\\\\\n", "\tevery + N & 28481 & 181127 & 857726\\\\\n", "\teach and every + N & 140 & 907 & 14529\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A matrix: 3 × 3 of type dbl\n", "\n", "| | BNC | COCA | GloWbE |\n", "|---|---|---|---|\n", "| each + N | 30708 | 141012 | 535316 |\n", "| every + N | 28481 | 181127 | 857726 |\n", "| each and every + N | 140 | 907 | 14529 |\n", "\n" ], "text/plain": [ " BNC COCA GloWbE\n", "each + N 30708 141012 535316\n", "every + N 28481 181127 857726\n", "each and every + N 140 907 14529" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# matrix\n", "values <- c(30708, 28481, 140, 141012, 181127, 907, 535316, 857726, 14529)\n", "row_names <- c(\"each + N\", \"every + N\", \"each and every + N\")\n", "col_names <- c(\"BNC\", \"COCA\", \"GloWbE\")\n", "mat <- matrix(values, 3, 3, dimnames=list(row_names, col_names))\n", "mat" ] }, { "cell_type": "code", "execution_count": 34, "id": "thirty-emission", "metadata": { "ExecuteTime": { "end_time": "2021-03-08T14:06:13.400790Z", "start_time": "2021-03-08T14:06:12.868Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A data.frame: 6 × 9
corpus_filecorpus_file_infomatchintensifierconstructionadjectivesyllable_count_adjNPsyllable_count_NP
<fct><fct><fct><fct><fct><fct><int><fct><int>
1KBF.xmlS conv a quite ferocious mess quite preadjectivalferocious 3mess 1
2AT1.xmlW biographyquite a flirty person quite predeterminerflirty 2person 2
3A7F.xmlW misc a rather anonymous name ratherpreadjectivalanonymous 4name 1
4ECD.xmlW commerce a rather precarious footholdratherpreadjectivalprecarious4foothold2
5B2E.xmlW biographyquite a restless night quite predeterminerrestless 2night 1
6AM4.xmlW misc a rather different turn ratherpreadjectivaldifferent 3turn 1
\n" ], "text/latex": [ "A data.frame: 6 × 9\n", "\\begin{tabular}{r|lllllllll}\n", " & corpus\\_file & corpus\\_file\\_info & match & intensifier & construction & adjective & syllable\\_count\\_adj & NP & syllable\\_count\\_NP\\\\\n", " & & & & & & & & & \\\\\n", "\\hline\n", "\t1 & KBF.xml & S conv & a quite ferocious mess & quite & preadjectival & ferocious & 3 & mess & 1\\\\\n", "\t2 & AT1.xml & W biography & quite a flirty person & quite & predeterminer & flirty & 2 & person & 2\\\\\n", "\t3 & A7F.xml & W misc & a rather anonymous name & rather & preadjectival & anonymous & 4 & name & 1\\\\\n", "\t4 & ECD.xml & W commerce & a rather precarious foothold & rather & preadjectival & precarious & 4 & foothold & 2\\\\\n", "\t5 & B2E.xml & W biography & quite a restless night & quite & predeterminer & restless & 2 & night & 1\\\\\n", "\t6 & AM4.xml & W misc & a rather different turn & rather & preadjectival & different & 3 & turn & 1\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 6 × 9\n", "\n", "| | corpus_file <fct> | corpus_file_info <fct> | match <fct> | intensifier <fct> | construction <fct> | adjective <fct> | syllable_count_adj <int> | NP <fct> | syllable_count_NP <int> |\n", "|---|---|---|---|---|---|---|---|---|---|\n", "| 1 | KBF.xml | S conv | a quite ferocious mess | quite | preadjectival | ferocious | 3 | mess | 1 |\n", "| 2 | AT1.xml | W biography | quite a flirty person | quite | predeterminer | flirty | 2 | person | 2 |\n", "| 3 | A7F.xml | W misc | a rather anonymous name | rather | preadjectival | anonymous | 4 | name | 1 |\n", "| 4 | ECD.xml | W commerce | a rather precarious foothold | rather | preadjectival | precarious | 4 | foothold | 2 |\n", "| 5 | B2E.xml | W biography | quite a restless night | quite | predeterminer | restless | 2 | night | 1 |\n", "| 6 | AM4.xml | W misc | a rather different turn | rather | preadjectival | different | 3 | turn | 1 |\n", "\n" ], "text/plain": [ " corpus_file corpus_file_info match intensifier\n", "1 KBF.xml S conv a quite ferocious mess quite \n", "2 AT1.xml W biography quite a flirty person quite \n", "3 A7F.xml W misc a rather anonymous name rather \n", "4 ECD.xml W commerce a rather precarious foothold rather \n", "5 B2E.xml W biography quite a restless night quite \n", "6 AM4.xml W misc a rather different turn rather \n", " construction adjective syllable_count_adj NP syllable_count_NP\n", "1 preadjectival ferocious 3 mess 1 \n", "2 predeterminer flirty 2 person 2 \n", "3 preadjectival anonymous 4 name 1 \n", "4 preadjectival precarious 4 foothold 2 \n", "5 predeterminer restless 2 night 1 \n", "6 preadjectival different 3 turn 1 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# lire un dataframe\n", "df <- read.table(\"https://bit.ly/2HzfquJ\", header=TRUE, sep=\"\\t\")\n", "head(df)" ] }, { "cell_type": "code", "execution_count": 35, "id": "olive-angle", "metadata": { "ExecuteTime": { "end_time": "2021-03-08T14:06:25.766738Z", "start_time": "2021-03-08T14:06:25.741Z" } }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
A data.frame: 4 × 3
sizevarietyperiod
<dbl><fct><fct>
BNC 100GB1980s-1993
COCA 450US1990-2012
Hansard1600GB1803-2005
Strathy 50CA1970s-2000s
\n" ], "text/latex": [ "A data.frame: 4 × 3\n", "\\begin{tabular}{r|lll}\n", " & size & variety & period\\\\\n", " & & & \\\\\n", "\\hline\n", "\tBNC & 100 & GB & 1980s-1993 \\\\\n", "\tCOCA & 450 & US & 1990-2012 \\\\\n", "\tHansard & 1600 & GB & 1803-2005 \\\\\n", "\tStrathy & 50 & CA & 1970s-2000s\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "A data.frame: 4 × 3\n", "\n", "| | size <dbl> | variety <fct> | period <fct> |\n", "|---|---|---|---|\n", "| BNC | 100 | GB | 1980s-1993 |\n", "| COCA | 450 | US | 1990-2012 |\n", "| Hansard | 1600 | GB | 1803-2005 |\n", "| Strathy | 50 | CA | 1970s-2000s |\n", "\n" ], "text/plain": [ " size variety period \n", "BNC 100 GB 1980s-1993 \n", "COCA 450 US 1990-2012 \n", "Hansard 1600 GB 1803-2005 \n", "Strathy 50 CA 1970s-2000s" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# construire un dataframe\n", "corpus <- c(\"BNC\", \"COCA\", \"Hansard\", \"Strathy\")\n", "size <- c(100, 450, 1600, 50)\n", "variety <- c(\"GB\", \"US\", \"GB\", \"CA\")\n", "period <- c(\"1980s-1993\", \"1990-2012\", \"1803-2005\", \"1970s-2000s\")\n", "df.manual <- data.frame(size, variety, period, row.names = corpus)\n", "df.manual" ] }, { "cell_type": "code", "execution_count": 36, "id": "noticed-reward", "metadata": { "ExecuteTime": { "end_time": "2021-03-08T14:06:36.008237Z", "start_time": "2021-03-08T14:06:35.989Z" } }, "outputs": [], "source": [ "# lire et sauvegarder rdata (rds)\n", "# saveRDS(df.manual.2, file=\"/pathtoyour/workingdirectory/df.manual.2.rds\") \n", "# readRDS(\"/pathtoyour/workingdirectory/df.manual.2.rds\") # Mac" ] }, { "cell_type": "code", "execution_count": 37, "id": "efficient-computer", "metadata": { "ExecuteTime": { "end_time": "2021-03-08T14:06:43.892457Z", "start_time": "2021-03-08T14:06:43.876Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1] \"a\"\n", "[1] \"b\"\n", "[1] \"c\"\n", "[1] \"d\"\n", "[1] \"e\"\n" ] } ], "source": [ "# exemple for\n", "for (i in letters[1:5]) print(i)" ] }, { "cell_type": "code", "execution_count": 38, "id": "assigned-greenhouse", "metadata": { "ExecuteTime": { "end_time": "2021-03-08T14:06:54.650425Z", "start_time": "2021-03-08T14:06:54.628Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1] \"-7 -> negative\"\n", "[1] \"-4 -> negative\"\n", "[1] \"-1 -> negative\"\n", "[1] \"0 -> zero\"\n", "[1] \"3 -> positive\"\n", "[1] \"12 -> positive\"\n", "[1] \"14 -> positive\"\n" ] } ], "source": [ "# exemple if elseif else\n", "integer <- c(-7, -4, -1, 0, 3, 12, 14)\n", "for (i in 1:length(integer)) {\n", " if (integer[i] < 0) print(paste(integer[i], \"-> negative\")) # first if statement\n", " else if (integer[i] == 0) print(paste(integer[i], \"-> zero\")) # second (nested) if statement\n", " else print(paste(integer[i], \"-> positive\"))\n", "}" ] }, { "cell_type": "markdown", "id": "ranking-sunrise", "metadata": {}, "source": [ "## References\n", "\n", "Desagulier, Guillaume. 2016. “A Lesson from Associative Learning: Asymmetry and Productivity in Multiple-Slot Constructions.” Journal Article. Corpus Linguistics and Linguistic Theory 12 (1).\n", "\n", "Haspelmath, Martin. 2011. “The Indeterminacy of Word Segmentation and the Nature of Morphology and Syntax.” Folia Linguistica 45 (1): 31–80.\n", "\n", "https://corpling.modyco.fr/workshops/M2TAL/1.R.fundamentals.html#2_R_scripts" ] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "3.6.3" }, "nbTranslate": { "displayLangs": [ "*" ], "hotkey": "alt-t", "langInMainMenu": true, "sourceLang": "en", "targetLang": "fr", "useGoogleTranslate": true }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": false, "skip_h1_title": true, "title_cell": "Table des matières", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }