在Python调用R接口,使用rpy2模块
包括函数与包的调用
#有三种方法调用R对象,分别“相当于”把r实例当作字典、把r实例当作方法、把r实例当作一个类对象 import rpy2.robjects as robjects ## 第一种, 把r实例当作字典 pi = robjects.r['pi'] print(pi) print(type(pi)) print(pi[0]) output: [1] 3.141593 <class 'rpy2.robjects.vectors.FloatVector'> 3.141592653589793 ## 第二种, 把r实例当作方法 # 这种方法从某种程度上讲是万能的,因为可以将任意大小和长度的R代码写成一个python字符串,之后通过robjects.r('Rcode')调用执行 a = robjects.r('a<-c(1,2,3)') print(type(a)) print(list(a)) print(a[0:]) output: <class 'rpy2.robjects.vectors.FloatVector'> [1.0, 2.0, 3.0] [1] 1 2 3 ## 第三种,把r实例当作一个类对象 t2=robjects.r.pi # 这种方法对于名称中有“点号”的变量会出问题,比如data.frame/read.csv等 print(t2[0]) output: 3.141592653589793 ## Note #要注意:robjects.r("r_script") 可以执行r代码 #对于一些特殊的R对象比如list和matrix,如果python要调去其中的部分数据,可以通过其rx()和rx2()方法操作。 #对于list,可以查看其name属性,以获得列表个个元素名称。rx()和相当于"["操作(注意取出的是R的list对象),而rx2()相当于"[["操作。如下: #1 tmp = robjects.r("list(a = matrix(1:10, nrow = 2), b = 'Hello')") print(tmp) """ output: $a [,1] [,2] [,3] [,4] [,5] [1,] 1 3 5 7 9 [2,] 2 4 6 8 10 $b [1] "Hello" """ #2 print(tmp.names) [1] "a" "b" #3 print(tmp.rx('a')) $a [,1] [,2] [,3] [,4] [,5] [1,] 1 3 5 7 9 [2,] 2 4 6 8 10 #4 print(list(tmp.rx('a'))) [R object with classes: ('matrix',) mapped to: [1, 2, 3, 4, ..., 7, 8, 9, 10]] #5 print(tmp.rx(1)) $a [,1] [,2] [,3] [,4] [,5] [1,] 1 3 5 7 9 [2,] 2 4 6 8 10 #6 print(tmp.rx2(1)) [,1] [,2] [,3] [,4] [,5] [1,] 1 3 5 7 9 [2,] 2 4 6 8 10 #7 print(tmp.rx2('a').rx(1, 1)) # first element of 'a' [1] 1 #8 print(tmp.rx2('a').rx(1, True)[0])# first row of 'a' [1] 1 3 5 7 9 #9 print(list(tmp.rx2('a').rx(1, True))) [1, 3, 5, 7, 9] #10 b = [i for i in tmp.rx2('a').rx(1, True)] print(b) [1, 3, 5, 7, 9]
# 1. 调用自定义函数 robjects.r(''' f <- function(r){pi * r} ''') t3=robjects.r['f'](3) # 3 为传递的参数 print('%.3f'%t3[0]) # 复杂的例子 r_script = ''' library(randomForest) # 导入随机森林包 ## use data set iris data = iris # 使用鸢尾花数据集 table(data$Species) ## create a randomForest model to classfy the iris species # 创建随机森林模型给鸢尾花分类 iris.rf <- randomForest(Species~., data = data, importance=T, proximity=T) print('--------here is the random model-------') print(iris.rf) print('--------here is the names of model-----') print(names(iris.rf)) confusion = iris.rf$confusion print(confusion) ''' robjects.r(r_script) # 2. 调用R自带的函数 # internal function in R t4=robjects.r['ls']()#可使用 ls()函数列出工作区中的所有变量;ls() 函数可以使用模式来匹配变量名称,eg:ls(pattern="var"),列出以“var”模式开头的变量 print(t4[0]) # another internal function l = robjects.r['letters'] print(l) print(len(l)) print(robjects.r['paste'](l, collapse = '-')) output: [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" [20] "t" "u" "v" "w" "x" "y" "z" 26 [1] "a-b-c-d-e-f-g-h-i-j-k-l-m-n-o-p-q-r-s-t-u-v-w-x-y-z" # an alternative way of getting 'paste' function in R # eval the R code coder = 'paste(%s, collapse = "-")' % (l.r_repr()) print(robjects.r(coder)) [1] "a-b-c-d-e-f-g-h-i-j-k-l-m-n-o-p-q-r-s-t-u-v-w-x-y-z" # 3. 执行R的脚本文件 robjects.r.source('/home/rongzhengqin2/learn/rpy2/test01.r') x = robjects.r('x')#获取脚本里的变量 y = robjects.r('y') print(x) #[1] 1 2 3 4 print(y) #[1] 1 4 9 16 # 4. 载入和使用R包 from rpy2.robjects.packages import importr stats = importr('stats') print('stats.rnorm(10):',stats.rnorm(10)) output: stats.rnorm(10): [1] -0.02499746 1.92827632 0.93832232 0.62033276 2.15107656 -0.26479198 [7] 0.59109714 -0.34845466 0.32339794 -1.78048630
python对象转换成R对象。 通常,可以将python的list对象,转换成为R的vector对象【robjects.ListVector()将python的字典(或list)转换成R的列表】,之后直接使用R函数调用。rpy2提供了几个函数,供我们把将python的list转化成R的不同数据类型的vector,对应的函数有 robjects.IntVector(),robjects.FloatVector()等
robjects.StrVector()#字符 robjects.IntVector()#整数 robjects.FloatVector()#浮点 robjects.complexVector()#复数 robjects.FactorVector()#因子 robjects.BoolVector()#布尔向量 robjects.ListVector()#列表 print(robjects.IntVector([1,2,3])) #[1] 1 2 3 print(robjects.FactorVector(['a','a','b','c'])) [1] a a b c Levels: a b c print(robjects.FloatVector([1.2,2.3])) # [1] 1.2 2.3 print(robjects.baseenv) # 基本环境空间 <environment: base> print(robjects.DataFrame({'a':[1,2],'b':[3,4]})) a.1L a.2L b.3L b.4L 1 1 2 3 4 testmatrix = robjects.IntVector([1, 2, 3, 4]) print(robjects.r['matrix'](testmatrix, nrow = 2)) [,1] [,2] [1,] 1 3 [2,] 2 4 t4=robjects.r['ls']() print(t4)
推荐使用tuple()或者list()函数,将R对象转换成tuple或者list类型
aa = robjects.r('c(1, 2, 3)') print(aa) #[1] 1 2 3 print(str(aa)) #[1] 1 2 3 print(tuple(aa)) #(1.0, 2.0, 3.0) print(list(aa)) #[1.0, 2.0, 3.0] bb = robjects.r('matrix(1:6, 2, 3)') print(bb) [,1] [,2] [,3] [1,] 1 3 5 [2,] 2 4 6 print(tuple(bb)) #(1, 2, 3, 4, 5, 6) print(list(bb)) #[1, 2, 3, 4, 5, 6]
#1 R函数的参数有默认值 robjects.r( """ testDefault <- function(a=3){ result = a*2 ## here should be NOTICE: must be return 'result'. must not return (a*2). ## if do, it will error: arg would not be used return(result) } """) res_def =robjects.r.testDefault() res_Notdef=robjects.r.testDefault(robjects.FloatVector([1.2,2.3])) print(res_def,res_Notdef) [1] 6 [1] 2.4 4.6 #2 传递并返回数字 robjects.r( """ add <- function(x,y){ sum_=x+y cat('In R:\t',x,'+',y,'=',sum_,sep = ' ') return(sum_) } """) x=4 y=5 res_int =robjects.r.add(x,y) print(type(res_int)) # <class 'rpy2.robjects.vectors.IntVector'> print(type(res_int[0])) # <class 'int'> print(x,' + ',y,' = ',res_int[0]) # 4 + 5 = 9 robjects.r( """ Hello <- function(s){ reStr="Hello python!!" cat('\nIn R:\t',s) return(reStr) } """) s = 'Hello R!!' res_str =robjects.r.Hello(s) print(type(res_str)) # <class 'rpy2.robjects.vectors.StrVector'> print(res_str[0]) # Hello python!! # 2 传递并返回一维数组 robjects.r(""" szTest <- function(sz){ cat("\n") print(sz) cat(typeof(sz),mode(sz),class(sz))#integer numeric integer for(i in 1:length(sz)){ sz[i]=sz[i]+2L } return(sz) } """) #sz_In=[1,2,3]#如这样传参数,则在R中为list类型 sz_Int=robjects.IntVector([1,2,3]) res_SzInt=robjects.r.szTest(sz_Int) print(type(res_SzInt))#在R中一定要注意对于int型要在后面加'L',否则会被转化为float print(res_SzInt) res_ListInt=list(res_SzInt) print(res_ListInt) robjects.r(""" matrixTest <- function(mat){ cat("\n") print(mat) cat(typeof(mat),mode(mat),class(mat))#integer numeric matrix row_=nrow(mat) col_=ncol(mat) for(i in 1:row_){ for(j in 1:col_){ mat[i,j]=mat[i,j]+2L } } return(mat) }""") testmatrix = robjects.IntVector([1, 2, 3, 4,5,6]) mat_Int=robjects.r['matrix'](testmatrix, nrow = 2) res_MatInt=robjects.r.matrixTest(mat_Int) print(type(res_MatInt)) print(robjects.r("dim(res_MatInt)")) robjects.r(""" listTest <- function(list_x){ cat("\n") print(list_x) cat(typeof(list_x),mode(list_x),class(list_x)) list_x[[1]][1]=list_x[[1]][1]+2L list_x[[2]][1]=list_x[[2]][1]+2.0 list_x[[3]][1]=paste(list_x[[3]][1],"add") for(i in 1:length(list_x[[4]])){ list_x[[4]][i]=list_x[[4]][i]+2 } row_=nrow(list_x[[5]]) col_=ncol(list_x[[5]]) cat('\nrow_',row_,'col_',col_) for(i in 1:row_){ for(j in 1:col_){ #print(list_x[[5]][row_*(j-1)+i]) list_x[[5]][row_*(j-1)+i]=list_x[[5]][row_*(j-1)+i]+2 } } return(list_x) } """) testmatrix = robjects.FloatVector([1, 2, 3, 4,5,6]) x=robjects.ListVector([('first',1),('second',2.0),('third','string'),('fouth',robjects.FloatVector([ 3.0,4.0,5.0])),('fifth', robjects.r['matrix'](testmatrix, nrow = 2))]) res=robjects.r.listTest(x) print(res) print(type(res) ,type(res.rx2('fifth'))) #在这里注意如何获取从R返回的list的各元素 print(res.rx2('first')[0],res.rx2('third')[0],list(res.rx2('fouth'))) # 3 string add [5.0, 6.0, 7.0] res_Listlist=list(res.rx2('fifth')) print(res_Listlist)#注意在输出'符号时,使用\'(转义字符) [3.0, 4.0, 5.0, 6.0, 7.0, 8.0] # 3 在传递给R函数list的数据时,可以有两种方法 scalar = 10 # if the order of the element does not matter,如果元素的顺序无关紧要 seasonal = robjects.ListVector({'order': robjects.IntVector((0,0,0)), 'period': scalar})#字典dict # if the order matters,即如果顺序重要的话 seasonal = robjects.ListVector([('order', robjects.IntVector([0,0,0])), ('period', scalar)])#列表list
import pandas as pd import rpy2.robjects as ro from rpy2.robjects.packages import importr from rpy2.robjects import pandas2ri from rpy2.robjects.conversion import localconverter pd_df = pd.DataFrame({'int_values': [1,2,3],'str_values': ['abc', 'def', 'ghi']}) print(pd_df) int_values str_values 0 1 abc 1 2 def 2 3 ghi with localconverter(ro.default_converter + pandas2ri.converter): r_from_pd_df = ro.conversion.py2rpy(pd_df) print(r_from_pd_df) int_values str_values 0 1 abc 1 2 def 2 3 ghi # 1. The conversion is automatically happening when calling R functions. For example, when calling the R function , base::summary base = importr('base') with localconverter(ro.default_converter + pandas2ri.converter): df_summary = base.summary(pd_df) #自动转换 print(df_summary) # ['Min. :1.0 ' '1st Qu.:1.5 ' 'Median :2.0 ' 'Mean :2.0 ' '3rd Qu.:2.5 ' 'Max. :3.0 ' 'Length:3 ' 'Class :character ' 'Mode :character ' NA_character_ NA_character_ NA_character_] # 2. Note that a ContextManager is used to limit the scope of the conversion. Without it, rpy2 will not know how to convert a pandas data frame: try: df_summary = base.summary(pd_df) except NotImplementedError as nie: print('NotImplementedError:') print(nie)
#1 Starting from an R data frame this time: r_df = ro.DataFrame({'int_values': ro.IntVector([1,2,3]),'str_values': ro.StrVector(['abc', 'def', 'ghi'])}) print(r_df) int_values str_values 1 1 abc 2 2 def 3 3 ghi with localconverter(ro.default_converter + pandas2ri.converter): pd_from_r_df = ro.conversion.rpy2py(r_df) print(pd_from_r_df) int_values str_values 1 1 abc 2 2 def 3 3 ghi
pd_df = pd.DataFrame({ 'Timestamp': pd.date_range('2017-01-01 00:00:00', periods=10, freq='s') }) print(pd_df) Timestamp 0 2017-01-01 00:00:00 1 2017-01-01 00:00:01 2 2017-01-01 00:00:02 3 2017-01-01 00:00:03 4 2017-01-01 00:00:04 5 2017-01-01 00:00:05 6 2017-01-01 00:00:06 7 2017-01-01 00:00:07 8 2017-01-01 00:00:08 9 2017-01-01 00:00:09 with localconverter(ro.default_converter + pandas2ri.converter): r_from_pd_df = ro.conversion.py2rpy(pd_df)
# The timezone used for conversion is the system’s default timezone unless pandas2ri.default_timezone is specified… or unless the time zone is specified in the original time object: pd_tz_df = pd.DataFrame({ 'Timestamp': pd.date_range('2017-01-01 00:00:00', periods=10, freq='s',tz='UTC') }) with localconverter(ro.default_converter + pandas2ri.converter): r_from_pd_tz_df = ro.conversion.py2rpy(pd_tz_df) print(r_from_pd_tz_df) Timestamp 0 2017-01-01 00:00:00 1 2017-01-01 00:00:01 2 2017-01-01 00:00:02 3 2017-01-01 00:00:03 4 2017-01-01 00:00:04 5 2017-01-01 00:00:05 6 2017-01-01 00:00:06 7 2017-01-01 00:00:07 8 2017-01-01 00:00:08 9 2017-01-01 00:00:09
# 1. From rpy2 to numpy # R vectors or arrays can be converted to numpy arrays using numpy.array() or numpy.asarray(). import numpy as np ltr = robjects.r.letters ltr_np = np.asarray(ltr) print(ltr_np) # 2. From numpy to rpy2 # The activation (and deactivation) of the automatic conversion of numpy objects into rpy2 objects can be made with: from rpy2.robjects import numpy2ri numpy2ri.activate() numpy2ri.deactivate()