Chapter 2 R基础知识

2.1 编程基础语法

以下代码用于展示r的数据操作、基本数据结构和常用的函数。

x = seq(1, 10, by=0.1)  # 生产一组数列,间隔为0.1
y = 2 * sin( x )      # 包含 Sin 函数
plot(x, y)      # 绘制出(x,y)的曲线。

plot(x=x, y=y, 
     xlab='x value', ylab = bquote(2 * sin( x ) ), col=2,
     type='o')
lines(x=x, y=abs(y), col='blue')
mtext(side = 3, text = 'Sin(x)', cex=2, line=-2)
grid()

length(x)
## [1] 91
mat.v = cbind(x, y)
mat.h = rbind(x, y)

boxplot(mat.v)

class(mat.v)
## [1] "matrix" "array"
dim(mat.v)
## [1] 91  2
dim(mat.h)
## [1]  2 91
nrow(mat.v)
## [1] 91
ncol(mat.v)
## [1] 2

2.1.1 函数定义

文件1Exercise/Functions/fun.add.R内容:

fun.add <- function(a, b){
  return (a + b)
}

文件2add.R内容:

source('Exercise/Functions/fun.add.R')
fun.add(2, 3)
## [1] 5

2.1.2 文件管理

2.2 数据类型与结构

  1. 数字:numeric 即一维数组, 常用的函数length, class等。
x0 = rnorm(100)
plot(x0)

  1. 数据框:data.frame
x=readRDS('Exercise/Data_RDS/JCR.RDS')
length(x)
## [1] 6
dim(x)
## [1] 14492     6
head(x) #前5组元素
##          c1
## 1 Acoustics
## 2 Acoustics
## 3 Acoustics
## 4 Acoustics
## 5 Acoustics
## 6 Acoustics
##                                                                    name
## 1                                             Ultrasonics Sonochemistry
## 2                                 Ultrasound In Obstetrics & Gynecology
## 3                                            Ultraschall In Der Medizin
## 4         Ieee-acm Transactions On Audio Speech And Language Processing
## 5                                        Journal Of Sound And Vibration
## 6 Ieee Transactions On Ultrasonics Ferroelectrics And Frequency Control
##        ISSN cites    IF CR
## 1 1350-4177 17314 7.279  1
## 2 0960-7692 12336 5.595  1
## 3 0172-4614  2238 4.613  3
## 4 2329-9290  3110 3.531  2
## 5 0022-460X 36167 3.123  1
## 6 0885-3010 11266 2.989  1
View(x)
  1. 字符:character
levels(x$name)[1:5]
## [1] ""                                              
## [2] "2d Materials"                                  
## [3] "3 Biotech"                                     
## [4] "3d Printing And Additive Manufacturing"        
## [5] "4or-a Quarterly Journal Of Operations Research"
  1. 向量:vector
vec = cbind(1:10)
plot(vec)

1. 矩阵:matrix

x.mat = as.matrix(x)
class(x.mat)
## [1] "matrix" "array"
length(x.mat)
## [1] 86952
  1. 因子:factor
library(ggplot2)
head(levels(x$c1))
## [1] "Acoustics"                           "Agricultural Economics & Policy"    
## [3] "Agricultural Engineering"            "Agriculture, Dairy & Animal Science"
## [5] "Agriculture, Multidisciplinary"      "Agronomy"
  1. 逻辑:logical
x = seq(-pi*2, pi * 2, by=0.01)
y = sin(x)
tf = (y > 0)
df = data.frame(x, y, tf)

plot(x, y, type = 'o', pch = 19, col = tf+1, xlab='X', ylab = 'Y = sin(x)')
abline(v = 0, col='red', lwd=4) #在x = 0处添加垂直线条,
abline(h = 0, col='blue', lwd=4) #在y = 0处添加水平线条,
grid()  # 添加坐标网格

2.3 可视化

par(mar = c(4, 4, .1, .1))  # 绘图的四边边界
x = seq(-pi*2, pi * 2, by=0.01)
y = sin(x)
plot(x, y, type = 'l', pch = 19, xlab='X', ylab = 'Y = sin(x)')
abline(v = 0, col='red', lwd=4) #在x = 0处添加垂直线条,
abline(h = 0, col='blue', lwd=4) #在y = 0处添加水平线条,
grid()  # 添加坐标网格
R绘图结果

(#fig:fig.v1)R绘图结果

n=1000
x = 1:n
y = abs(rnorm(n, mean=10, sd=3))
y.sort = sort(y)
par(mfrow=c(2,2))
plot(x, y.sort, log=''); grid()
plot(x, y.sort, log='x'); grid()
plot(x, y.sort, log='y'); grid()
plot(x, y.sort, log='xy'); grid()
绘图:排序的随机数

Figure 2.1: 绘图:排序的随机数

2.4 三维可视化

install.package('rgl')
library(rgl)
with(iris, plot3d(Sepal.Length, Sepal.Width, Petal.Length, 
                  type="s", col=as.numeric(Species)))