目录

rpy2接口教程


在Python调用R接口,使用rpy2模块


1. python调用R对象

包括函数与包的调用

snippet.python
#有三种方法调用R对象,分别“相当于”把r实例当作字典、把r实例当作方法、把r实例当作一个类对象
import rpy2.robjects as robjects
 
## 第一种, 把r实例当作字典
pi = robjects.r['pi']
print(pi)
print(type(pi))
print(pi[0])
output:
  [1] 3.141593
  <class 'rpy2.robjects.vectors.FloatVector'>
  3.141592653589793
 
## 第二种, 把r实例当作方法
# 这种方法从某种程度上讲是万能的,因为可以将任意大小和长度的R代码写成一个python字符串,之后通过robjects.r('Rcode')调用执行
a = robjects.r('a<-c(1,2,3)')
print(type(a))
print(list(a))
print(a[0:])
output:
  <class 'rpy2.robjects.vectors.FloatVector'>
  [1.0, 2.0, 3.0]
  [1] 1 2 3
 
## 第三种,把r实例当作一个类对象
t2=robjects.r.pi # 这种方法对于名称中有“点号”的变量会出问题,比如data.frame/read.csv等
print(t2[0])
output:
  3.141592653589793
 
## Note
#要注意:robjects.r("r_script") 可以执行r代码
#对于一些特殊的R对象比如list和matrix,如果python要调去其中的部分数据,可以通过其rx()和rx2()方法操作。
#对于list,可以查看其name属性,以获得列表个个元素名称。rx()和相当于"["操作(注意取出的是R的list对象),而rx2()相当于"[["操作。如下:
#1
tmp = robjects.r("list(a = matrix(1:10, nrow = 2), b = 'Hello')")
print(tmp)
"""
output:
$a
     [,1] [,2] [,3] [,4] [,5]
[1,]    1    3    5    7    9
[2,]    2    4    6    8   10
 
$b
[1] "Hello"
"""
#2
print(tmp.names)
[1] "a" "b"
#3
print(tmp.rx('a'))
$a
     [,1] [,2] [,3] [,4] [,5]
[1,]    1    3    5    7    9
[2,]    2    4    6    8   10
#4
print(list(tmp.rx('a')))
[R object with classes: ('matrix',) mapped to:
[1, 2, 3, 4, ..., 7, 8, 9, 10]]
 
#5
print(tmp.rx(1))
$a
     [,1] [,2] [,3] [,4] [,5]
[1,]    1    3    5    7    9
[2,]    2    4    6    8   10
 
#6
print(tmp.rx2(1))
     [,1] [,2] [,3] [,4] [,5]
[1,]    1    3    5    7    9
[2,]    2    4    6    8   10
 
#7
print(tmp.rx2('a').rx(1, 1)) # first element of 'a'
[1] 1
 
#8
print(tmp.rx2('a').rx(1, True)[0])# first row of 'a'
[1] 1 3 5 7 9
 
#9
print(list(tmp.rx2('a').rx(1, True)))
[1, 3, 5, 7, 9]
 
#10
b = [i for i in tmp.rx2('a').rx(1, True)]
print(b)
[1, 3, 5, 7, 9]

2. 调用R函数

snippet.python
# 1. 调用自定义函数
robjects.r('''
        f <- function(r){pi * r}
        ''')
t3=robjects.r['f'](3) # 3 为传递的参数
print('%.3f'%t3[0])
 
# 复杂的例子
r_script = ''' 
library(randomForest) # 导入随机森林包
## use data set iris
data = iris # 使用鸢尾花数据集
table(data$Species)
## create a randomForest model to classfy the iris species
# 创建随机森林模型给鸢尾花分类
iris.rf <- randomForest(Species~., data = data, importance=T, proximity=T)
print('--------here is the random model-------')
print(iris.rf)
print('--------here is the names of model-----')
print(names(iris.rf))
confusion = iris.rf$confusion
print(confusion)
'''
robjects.r(r_script)
 
# 2. 调用R自带的函数
# internal function in R
t4=robjects.r['ls']()#可使用 ls()函数列出工作区中的所有变量;ls() 函数可以使用模式来匹配变量名称,eg:ls(pattern="var"),列出以“var”模式开头的变量
print(t4[0])
 
# another internal function
l = robjects.r['letters']
print(l)
print(len(l))
print(robjects.r['paste'](l, collapse = '-'))
output:
  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
  [20] "t" "u" "v" "w" "x" "y" "z"
  26
  [1] "a-b-c-d-e-f-g-h-i-j-k-l-m-n-o-p-q-r-s-t-u-v-w-x-y-z"
 
# an alternative way of getting 'paste' function in R
# eval the R code
coder = 'paste(%s, collapse = "-")' % (l.r_repr())
print(robjects.r(coder))
[1] "a-b-c-d-e-f-g-h-i-j-k-l-m-n-o-p-q-r-s-t-u-v-w-x-y-z"
 
# 3. 执行R的脚本文件
robjects.r.source('/home/rongzhengqin2/learn/rpy2/test01.r')
x = robjects.r('x')#获取脚本里的变量
y = robjects.r('y')
print(x) #[1] 1 2 3 4
print(y) #[1]  1  4  9 16
 
# 4. 载入和使用R包
from rpy2.robjects.packages import importr
stats = importr('stats')
print('stats.rnorm(10):',stats.rnorm(10))
output:
  stats.rnorm(10):  [1] -0.02499746  1.92827632  0.93832232  0.62033276  2.15107656 -0.26479198
   [7]  0.59109714 -0.34845466  0.32339794 -1.78048630

3. Python与R的对象转换

python对象转换成R对象。 通常,可以将python的list对象,转换成为R的vector对象【robjects.ListVector()将python的字典(或list)转换成R的列表】,之后直接使用R函数调用。rpy2提供了几个函数,供我们把将python的list转化成R的不同数据类型的vector,对应的函数有 robjects.IntVector(),robjects.FloatVector()等

snippet.python
robjects.StrVector()#字符
robjects.IntVector()#整数
robjects.FloatVector()#浮点
robjects.complexVector()#复数
robjects.FactorVector()#因子
robjects.BoolVector()#布尔向量
robjects.ListVector()#列表
 
print(robjects.IntVector([1,2,3])) #[1] 1 2 3
print(robjects.FactorVector(['a','a','b','c']))
  [1] a a b c
  Levels: a b c
print(robjects.FloatVector([1.2,2.3])) # [1] 1.2 2.3
print(robjects.baseenv) # 基本环境空间 <environment: base>
print(robjects.DataFrame({'a':[1,2],'b':[3,4]}))
  a.1L a.2L b.3L b.4L
1    1    2    3    4
 
testmatrix = robjects.IntVector([1, 2, 3, 4]) 
print(robjects.r['matrix'](testmatrix, nrow = 2)) 
     [,1] [,2]
[1,]    1    3
[2,]    2    4
 
t4=robjects.r['ls']()
print(t4)

4. R对象转换成python对象

推荐使用tuple()或者list()函数,将R对象转换成tuple或者list类型

snippet.python
aa = robjects.r('c(1, 2, 3)')
print(aa)         #[1] 1 2 3
print(str(aa))    #[1] 1 2 3
print(tuple(aa))  #(1.0, 2.0, 3.0)
print(list(aa))   #[1.0, 2.0, 3.0]
 
bb = robjects.r('matrix(1:6, 2, 3)')
print(bb)
         [,1] [,2] [,3]
    [1,]    1    3    5
    [2,]    2    4    6
print(tuple(bb))  #(1, 2, 3, 4, 5, 6)
print(list(bb))   #[1, 2, 3, 4, 5, 6]

5. Python调用R的代码实现

snippet.python
#1 R函数的参数有默认值
robjects.r(
        """ 
        testDefault <- function(a=3){
            result = a*2
            ## here should be NOTICE: must be return 'result'. must not return (a*2). 
            ## if do, it will error: arg would not be used
            return(result)
        }
        """)
res_def =robjects.r.testDefault()
res_Notdef=robjects.r.testDefault(robjects.FloatVector([1.2,2.3]))
print(res_def,res_Notdef)
    [1] 6
    [1] 2.4 4.6
 
 
#2 传递并返回数字
robjects.r(
        """ 
        add <- function(x,y){
            sum_=x+y
            cat('In R:\t',x,'+',y,'=',sum_,sep = ' ')
            return(sum_)
        }
        """)
 
x=4
y=5
res_int =robjects.r.add(x,y)
print(type(res_int)) # <class 'rpy2.robjects.vectors.IntVector'>
print(type(res_int[0])) # <class 'int'>
print(x,' + ',y,' = ',res_int[0]) # 4  +  5  =  9
 
 
robjects.r(
        """ 
        Hello <- function(s){
            reStr="Hello python!!"
            cat('\nIn R:\t',s)
            return(reStr)
        }
        """)
s = 'Hello R!!'
res_str =robjects.r.Hello(s)
print(type(res_str)) # <class 'rpy2.robjects.vectors.StrVector'>
print(res_str[0]) # Hello python!!
 
# 2 传递并返回一维数组
robjects.r("""
        szTest <- function(sz){
            cat("\n")
            print(sz)
            cat(typeof(sz),mode(sz),class(sz))#integer numeric integer
            for(i in 1:length(sz)){
                sz[i]=sz[i]+2L
                }
            return(sz)
        }
        """)
#sz_In=[1,2,3]#如这样传参数,则在R中为list类型
sz_Int=robjects.IntVector([1,2,3])
res_SzInt=robjects.r.szTest(sz_Int)
print(type(res_SzInt))#在R中一定要注意对于int型要在后面加'L',否则会被转化为float
print(res_SzInt)
res_ListInt=list(res_SzInt)
print(res_ListInt)
 
 
robjects.r("""
        matrixTest <- function(mat){
            cat("\n")
            print(mat)
            cat(typeof(mat),mode(mat),class(mat))#integer numeric matrix
            row_=nrow(mat)
            col_=ncol(mat)
            for(i in 1:row_){
                for(j in 1:col_){
                    mat[i,j]=mat[i,j]+2L
                }
            }
            return(mat)
        }""")
 
testmatrix = robjects.IntVector([1, 2, 3, 4,5,6])
mat_Int=robjects.r['matrix'](testmatrix, nrow = 2)
res_MatInt=robjects.r.matrixTest(mat_Int)
print(type(res_MatInt))
print(robjects.r("dim(res_MatInt)"))
 
robjects.r("""
        listTest <- function(list_x){
            cat("\n")
            print(list_x)
            cat(typeof(list_x),mode(list_x),class(list_x))
            list_x[[1]][1]=list_x[[1]][1]+2L
            list_x[[2]][1]=list_x[[2]][1]+2.0
            list_x[[3]][1]=paste(list_x[[3]][1],"add")
            for(i in 1:length(list_x[[4]])){
                list_x[[4]][i]=list_x[[4]][i]+2
            }
            row_=nrow(list_x[[5]])
            col_=ncol(list_x[[5]])
            cat('\nrow_',row_,'col_',col_)
            for(i in 1:row_){
                for(j in 1:col_){
                    #print(list_x[[5]][row_*(j-1)+i])
                    list_x[[5]][row_*(j-1)+i]=list_x[[5]][row_*(j-1)+i]+2
                }
            }
            return(list_x)
        }
""")
 
 
testmatrix = robjects.FloatVector([1, 2, 3, 4,5,6])
x=robjects.ListVector([('first',1),('second',2.0),('third','string'),('fouth',robjects.FloatVector([ 3.0,4.0,5.0])),('fifth', robjects.r['matrix'](testmatrix, nrow = 2))])
res=robjects.r.listTest(x)
print(res)
print(type(res) ,type(res.rx2('fifth')))
#在这里注意如何获取从R返回的list的各元素
print(res.rx2('first')[0],res.rx2('third')[0],list(res.rx2('fouth'))) # 3 string add [5.0, 6.0, 7.0]
res_Listlist=list(res.rx2('fifth'))
print(res_Listlist)#注意在输出'符号时,使用\'(转义字符) [3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
 
# 3 在传递给R函数list的数据时,可以有两种方法
scalar = 10
# if the order of the element does not matter,如果元素的顺序无关紧要
seasonal = robjects.ListVector({'order': robjects.IntVector((0,0,0)), 'period': scalar})#字典dict
# if the order matters,即如果顺序重要的话
seasonal = robjects.ListVector([('order', robjects.IntVector([0,0,0])), ('period', scalar)])#列表list

6. Pandas to R

snippet.python
import pandas as pd
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter
pd_df = pd.DataFrame({'int_values': [1,2,3],'str_values': ['abc', 'def', 'ghi']})
print(pd_df)
       int_values str_values
    0           1        abc
    1           2        def
    2           3        ghi
with localconverter(ro.default_converter + pandas2ri.converter):
    r_from_pd_df = ro.conversion.py2rpy(pd_df)
print(r_from_pd_df)
       int_values str_values
    0           1        abc
    1           2        def
    2           3        ghi
 
# 1. The conversion is automatically happening when calling R functions. For example, when calling the R function , base::summary
base = importr('base')
with localconverter(ro.default_converter + pandas2ri.converter):
      df_summary = base.summary(pd_df)   #自动转换
print(df_summary) # ['Min.   :1.0  ' '1st Qu.:1.5  ' 'Median :2.0  ' 'Mean   :2.0  '
     '3rd Qu.:2.5  ' 'Max.   :3.0  ' 'Length:3          ' 'Class :character  '
     'Mode  :character  ' NA_character_ NA_character_ NA_character_]
 
# 2. Note that a ContextManager is used to limit the scope of the conversion. Without it, rpy2 will not know how to convert a pandas data frame:
try:
    df_summary = base.summary(pd_df)
except NotImplementedError as nie:
    print('NotImplementedError:')
    print(nie)

7. R to pandas

snippet.python
#1 Starting from an R data frame this time:
r_df = ro.DataFrame({'int_values': ro.IntVector([1,2,3]),'str_values': ro.StrVector(['abc', 'def', 'ghi'])})
print(r_df)
       int_values str_values
    1           1        abc
    2           2        def
    3           3        ghi
 
with localconverter(ro.default_converter + pandas2ri.converter):
    pd_from_r_df = ro.conversion.rpy2py(r_df)
print(pd_from_r_df)
       int_values str_values
    1           1        abc
    2           2        def
    3           3        ghi

8. Date and time objects

snippet.python
pd_df = pd.DataFrame({
            'Timestamp': pd.date_range('2017-01-01 00:00:00', periods=10, freq='s')
        })  
 
print(pd_df)
                Timestamp
    0 2017-01-01 00:00:00
    1 2017-01-01 00:00:01
    2 2017-01-01 00:00:02
    3 2017-01-01 00:00:03
    4 2017-01-01 00:00:04
    5 2017-01-01 00:00:05
    6 2017-01-01 00:00:06
    7 2017-01-01 00:00:07
    8 2017-01-01 00:00:08
    9 2017-01-01 00:00:09
 
 
with localconverter(ro.default_converter + pandas2ri.converter):
      r_from_pd_df = ro.conversion.py2rpy(pd_df)

9. Timestamp

snippet.python
# The timezone used for conversion is the system’s default timezone unless pandas2ri.default_timezone is specified… or unless the time zone is specified in the original time  object:
 
pd_tz_df = pd.DataFrame({
                'Timestamp': pd.date_range('2017-01-01 00:00:00', periods=10, freq='s',tz='UTC')
                })
 
 
 
with localconverter(ro.default_converter + pandas2ri.converter):
      r_from_pd_tz_df = ro.conversion.py2rpy(pd_tz_df)
print(r_from_pd_tz_df)
                Timestamp
    0 2017-01-01 00:00:00
    1 2017-01-01 00:00:01
    2 2017-01-01 00:00:02
    3 2017-01-01 00:00:03
    4 2017-01-01 00:00:04
    5 2017-01-01 00:00:05
    6 2017-01-01 00:00:06
    7 2017-01-01 00:00:07
    8 2017-01-01 00:00:08
    9 2017-01-01 00:00:09

10. numpy to R

snippet.python
# 1. From rpy2 to numpy
# R vectors or arrays can be converted to numpy arrays using numpy.array() or numpy.asarray().
import numpy as np
ltr = robjects.r.letters
ltr_np = np.asarray(ltr)
print(ltr_np)
 
# 2. From numpy to rpy2
# The activation (and deactivation) of the automatic conversion of numpy objects into rpy2 objects can be made with:
from rpy2.robjects import numpy2ri
numpy2ri.activate()
numpy2ri.deactivate()