7.8 Read Strings from a File

20180604 There may be occasions where we would like to load a dataset from a file as strings, one line as a string, returning a vector of strings. We achieve this using the function base::readLines(). In the following example we access the system file weather.csv that is provided by the rattle (G. Williams 2020) package.

library(glue)         # Format strings: glue().

dsname <- "weather"   # Dataset name.
ftype  <- "csv"       # Source dataset file type.
fname <- glue("{dsname}.{ftype}")

fname %T>%
  print() %>%
  system.file(ftype, ., package="rattle") %>%
  readLines() ->
ds
## weather.csv

Show the first few lines using utils::head():

head(ds)
## [1] "\"Date\",\"Location\",\"MinTemp\",\"MaxTemp\",\"Rainfall\",\"Evaporation\",\"Sunshine\",\"WindGustDir\",\"WindGustSpeed\",\"WindDir9am\",\"WindDir3pm\",\"WindSpeed9am\",\"WindSpeed3pm\",\"Humidity9am\",\"Humidity3pm\",\"Pressure9am\",\"Pressure3pm\",\"Cloud9am\",\"Cloud3pm\",\"Temp9am\",\"Temp3pm\",\"RainToday\",\"RISK_MM\",\"RainTomorrow\""
## [2] "2007-11-01,\"Canberra\",8,24.3,0,3.4,6.3,\"NW\",30,\"SW\",\"NW\",6,20,68,29,1019.7,1015,7,7,14.4,23.6,\"No\",3.6,\"Yes\""                                                                                                                                                                                                                              
## [3] "2007-11-02,\"Canberra\",14,26.9,3.6,4.4,9.7,\"ENE\",39,\"E\",\"W\",4,17,80,36,1012.4,1008.4,5,3,17.5,25.7,\"Yes\",3.6,\"Yes\""                                                                                                                                                                                                                         
## [4] "2007-11-03,\"Canberra\",13.7,23.4,3.6,5.8,3.3,\"NW\",85,\"N\",\"NNE\",6,6,82,69,1009.5,1007.2,8,7,15.4,20.2,\"Yes\",39.8,\"Yes\""                                                                                                                                                                                                                      
## [5] "2007-11-04,\"Canberra\",13.3,15.5,39.8,7.2,9.1,\"NW\",54,\"WNW\",\"W\",30,24,62,56,1005.5,1007,2,7,13.5,14.1,\"Yes\",2.8,\"Yes\""                                                                                                                                                                                                                      
## [6] "2007-11-05,\"Canberra\",7.6,16.1,2.8,5.6,10.6,\"SSE\",50,\"SSE\",\"ESE\",20,28,68,49,1018.3,1018.5,7,7,11.1,15.4,\"Yes\",0,\"No\""

Find those strings that contain a specific pattern using base::grep().

grep("ENE", ds)
##  [1]   3  10  23  26  28  36  37  42  43  49  50  54  68  69  71  76  86  91  97
## [20] 101 103 106 108 109 110 118 129 132 133 135 138 145 160 171 176 215 222 278
## [39] 303 304 310 323 341 348 351 357 365
grep("ENE", ds, value=TRUE)
##  [1] "2007-11-02,\"Canberra\",14,26.9,3.6,4.4,9.7,\"ENE\",39,\"E\",\"W\",4,17,80,36,1012.4,1008.4,5,3,17.5,25.7,\"Yes\",3.6,\"Yes\""  
##  [2] "2007-11-09,\"Canberra\",8.8,19.5,0,4,4.1,\"S\",48,\"E\",\"ENE\",19,17,70,48,1026.1,1022.7,7,7,14.1,18.9,\"No\",16.2,\"Yes\""    
##  [3] "2007-11-22,\"Canberra\",16.4,19.4,0.4,9.2,0,\"E\",26,\"ENE\",\"E\",6,11,88,72,1010.7,1008.9,8,8,16.5,18.3,\"No\",25.8,\"Yes\""  
##  [4] "2007-11-25,\"Canberra\",15.4,28.4,0,4.4,8.1,\"ENE\",33,\"SSE\",\"NE\",9,15,85,31,1022.4,1018.6,8,2,16.8,27.3,\"No\",0,\"No\""   
##  [5] "2007-11-27,\"Canberra\",13.3,22.2,0.2,6.6,2.3,\"ENE\",39,\"E\",\"E\",20,17,70,55,1021,1018.6,7,7,16.5,21.2,\"No\",0,\"No\""     
##  [6] "2007-12-05,\"Canberra\",14.5,21.8,0,8.4,9.8,\"ENE\",43,\"ESE\",\"E\",11,30,69,63,1015,1015.3,7,1,18.6,20.5,\"No\",0,\"No\""     
##  [7] "2007-12-06,\"Canberra\",16.3,26.8,0,6,6.3,\"ENE\",39,\"ESE\",\"ESE\",13,9,78,54,1018.8,1016.1,8,7,18.6,24.5,\"No\",0,\"No\""    
##  [8] "2007-12-11,\"Canberra\",11.8,18.5,0.6,4.8,2.3,\"ENE\",35,\"ESE\",\"E\",9,15,60,63,1018,1017.4,8,6,13.8,16.4,\"No\",0,\"No\""    
##  [9] "2007-12-12,\"Canberra\",11.7,21.5,0,4.2,7.3,\"ENE\",41,\"ESE\",\"E\",15,24,66,51,1021.1,1019.6,7,5,15.7,19.4,\"No\",0,\"No\""   
## [10] "2007-12-18,\"Canberra\",7.5,20.9,0,6.6,8.7,\"ENE\",39,\"SE\",\"E\",13,20,61,43,1020.6,1018.3,6,7,15.3,19.8,\"No\",0,\"No\""     
## [11] "2007-12-19,\"Canberra\",12.8,21,0,6.4,0.8,\"NE\",22,\"NE\",\"ENE\",7,6,72,54,1018,1015.6,8,8,16.1,20,\"No\",3.4,\"Yes\""        
## [12] "2007-12-23,\"Canberra\",9.2,20.4,17.4,7.8,10.2,\"ENE\",39,\"N\",\"N\",9,20,59,49,1009.5,1009.1,1,7,14.9,18.8,\"Yes\",0,\"No\""  
## [13] "2008-01-06,\"Canberra\",14.3,34.1,0,6.6,10.5,\"ENE\",39,\"W\",\"NNW\",6,19,77,34,1006.6,1003.3,1,1,18.9,31.9,\"No\",0,\"No\""   
## [14] "2008-01-07,\"Canberra\",16.5,33.9,0,9,12.6,\"ENE\",39,\"E\",\"NW\",11,11,60,36,1012.3,1009.5,1,1,20.8,31.3,\"No\",0,\"No\""     
## [15] "2008-01-09,\"Canberra\",17.5,29.9,0,6.6,8.8,\"E\",43,\"E\",\"ENE\",13,11,74,45,1018.5,1015.9,8,1,19.3,27.9,\"No\",0,\"No\""     
## [16] "2008-01-14,\"Canberra\",16,22.8,0,12.4,6,\"E\",50,\"E\",\"ENE\",13,19,70,57,1012.3,1012.3,8,6,17,21.6,\"No\",0,\"No\""          
## [17] "2008-01-24,\"Canberra\",10.3,27.8,0,6.4,9.9,\"ENE\",35,\"SE\",\"NE\",6,11,64,29,1021.3,1018.1,3,6,17,26.3,\"No\",0,\"No\""      
## [18] "2008-01-29,\"Canberra\",17.9,33.9,0,10.4,11.8,\"ENE\",46,\"S\",\"W\",6,11,72,22,1017.4,1014.6,1,1,19.8,32.3,\"No\",0,\"No\""    
## [19] "2008-02-04,\"Canberra\",18.2,22.6,1.8,8,0,\"ENE\",33,\"SSE\",\"ENE\",7,13,92,76,1014.4,1011.5,8,8,18.5,22.1,\"Yes\",9,\"Yes\""  
## [20] "2008-02-08,\"Canberra\",12.4,19.9,16.2,5.4,5.6,\"ENE\",41,\"ESE\",\"ESE\",7,20,75,58,1007.6,1005,7,7,13.6,18.7,\"Yes\",0,\"No\""
## [21] "2008-02-10,\"Canberra\",9.1,23.1,0,5.8,9.6,\"ENE\",41,\"SSE\",\"W\",17,13,66,37,1013.6,1011.4,1,7,14,21.4,\"No\",0,\"No\""      
## [22] "2008-02-13,\"Canberra\",12.6,18.2,11,3.2,0.4,\"ENE\",30,\"SSE\",\"SSE\",13,15,81,73,1010,1011,7,8,16.3,16.8,\"Yes\",0.2,\"No\"" 
## [23] "2008-02-15,\"Canberra\",10.8,25.2,0,5.6,12.6,\"ENE\",35,\"SE\",\"E\",7,7,69,32,1020.2,1016.7,1,1,15,24.3,\"No\",0,\"No\""       
## [24] "2008-02-16,\"Canberra\",11.2,26.1,0,7.2,12.6,\"ENE\",39,\"SE\",\"E\",7,19,71,40,1022.9,1020,1,1,15.3,24.7,\"No\",0,\"No\""      
## [25] "2008-02-17,\"Canberra\",12.1,24.1,0,7.4,10.2,\"ENE\",46,\"ESE\",\"NNE\",9,13,64,46,1025.2,1021.4,7,1,15.4,22.5,\"No\",0,\"No\"" 
## [26] "2008-02-25,\"Canberra\",11.5,25.9,0,5.2,10.2,\"ENE\",44,\"ENE\",\"WSW\",9,7,70,35,1016.4,1014.3,6,2,15.8,24.3,\"No\",0,\"No\""  
## [27] "2008-03-07,\"Canberra\",10.8,29.2,0,8.4,7.5,\"E\",50,NA,\"ENE\",NA,9,56,34,1021.6,1017.7,0,7,17.5,27.7,\"No\",3,\"Yes\""        
## [28] "2008-03-10,\"Canberra\",12.5,31.7,0,6.6,11.2,\"WNW\",24,\"ENE\",\"W\",6,9,74,28,1024.9,1020.4,1,1,17.2,30.1,\"No\",0,\"No\""    
## [29] "2008-03-11,\"Canberra\",13.9,34.7,0,6.4,8.5,\"SSW\",46,\"SE\",\"ENE\",11,7,50,18,1022.8,1019.3,0,3,20.2,32.7,\"No\",0.2,\"No\"" 
## [30] "2008-03-13,\"Canberra\",13.2,33.1,0,8.6,9.7,\"ENE\",39,\"SE\",\"NNW\",7,7,91,18,1020.9,1017.7,7,1,16.1,31.7,\"No\",0,\"No\""    
## [31] "2008-03-16,\"Canberra\",11.3,32.3,0,9.4,11.4,\"NE\",28,\"ENE\",\"WNW\",4,6,44,17,1024.3,1020.7,5,2,18.2,30.5,\"No\",0,\"No\""   
## [32] "2008-03-23,\"Canberra\",12.8,24.9,0,2.4,6.2,\"ENE\",30,\"NNW\",\"SW\",6,7,76,44,1023.2,1019.3,7,6,15.5,24.2,\"No\",0.6,\"No\""  
## [33] "2008-04-07,\"Canberra\",8.1,20.5,0,3.8,7.8,\"ENE\",31,\"ESE\",\"ENE\",11,11,66,48,1027.8,1024,1,5,14.9,19.5,\"No\",0,\"No\""    
## [34] "2008-04-18,\"Canberra\",7.9,19.7,0,3.2,8.3,\"ESE\",48,\"SSE\",\"ENE\",17,11,58,43,1027.3,1024.1,3,6,14,19.1,\"No\",0,\"No\""    
## [35] "2008-04-23,\"Canberra\",7.5,19,0,4,6.8,\"ENE\",26,NA,\"ESE\",0,7,74,43,1025.5,1022.2,6,5,12.6,18.1,\"No\",0,\"No\""             
## [36] "2008-06-01,\"Canberra\",0.6,14,0,2.2,2.7,\"NE\",17,NA,\"ENE\",NA,7,99,67,1032.3,1028.9,7,7,4.6,13.9,\"No\",0,\"No\""            
## [37] "2008-06-08,\"Canberra\",4.3,14.5,0,2,3,\"E\",30,\"SSE\",\"ENE\",6,13,76,70,1034.3,1031.7,7,7,7.9,13,\"No\",0,\"No\""            
## [38] "2008-08-03,\"Canberra\",2.3,12.8,0,2.2,9.6,\"WNW\",35,\"ENE\",\"WNW\",4,24,70,48,1021.2,1018.1,1,1,7.8,11.6,\"No\",0,\"No\""    
## [39] "2008-08-28,\"Canberra\",-0.1,14.7,0,3.4,9.9,\"ENE\",30,\"SE\",\"NE\",6,15,60,43,1029.9,1025.9,1,5,7.2,12.3,\"No\",0,\"No\""     
## [40] "2008-08-29,\"Canberra\",-0.2,16.2,0,3.4,5.9,\"E\",28,NA,\"ENE\",0,20,74,45,1027.6,1022.9,7,6,7.3,14.6,\"No\",0,\"No\""          
## [41] "2008-09-04,\"Canberra\",5.4,11.3,0.2,2.2,0.6,\"ENE\",35,\"SE\",\"ESE\",11,17,65,61,1035.7,1031.9,7,7,8.3,10.2,\"No\",0,\"No\""  
## [42] "2008-09-17,\"Canberra\",0.7,14.1,0,5.6,9,\"ENE\",20,\"SSW\",\"NNW\",6,7,69,43,1026.7,1022.1,7,1,7.4,13.7,\"No\",0,\"No\""       
## [43] "2008-10-05,\"Canberra\",10.3,21.3,3,4.2,6.7,\"NNW\",43,\"ENE\",\"N\",7,19,79,46,1018.1,1013.6,8,1,11.7,19.8,\"Yes\",0,\"No\""   
## [44] "2008-10-12,\"Canberra\",4.5,23.9,0,4.8,11.7,\"NW\",30,\"ENE\",\"NNW\",4,11,67,27,1025.8,1021.5,0,4,12.6,22.3,\"No\",0,\"No\""   
## [45] "2008-10-15,\"Canberra\",9.2,19.6,0.6,3.4,10.4,\"ENE\",31,\"SSE\",\"NNW\",4,7,71,42,1022.3,1019.7,7,4,11.6,18.4,\"No\",0,\"No\"" 
## [46] "2008-10-21,\"Canberra\",9,20.6,0,9,6.2,\"ENE\",39,\"S\",\"SW\",11,11,54,28,1022.3,1018.6,7,5,11.4,18.5,\"No\",0.8,\"No\""       
## [47] "2008-10-29,\"Canberra\",12.5,19.9,0,8.4,5.3,\"ESE\",43,\"ENE\",\"ENE\",11,9,63,47,1024,1022.8,3,2,14.5,18.3,\"No\",0,\"No\""

References

Williams, Graham. 2020. Rattle: Graphical User Interface for Data Science in r. https://rattle.togaware.com/.


Your donation will support ongoing development and give you access to the PDF version of this book. Desktop Survival Guides include Data Science, GNU/Linux, and MLHub. Books available on Amazon include Data Mining with Rattle and Essentials of Data Science. Popular open source software includes rattle, wajig, and mlhub. Hosted by Togaware, a pioneer of free and open source software since 1984.
Copyright © 1995-2021 Graham.Williams@togaware.com Creative Commons Attribution-ShareAlike 4.0.