@@ -35,7 +35,7 @@ General terminology translation
35
35
~~~~~~~~~~~~~
36
36
37
37
A ``DataFrame `` in pandas is analogous to an Excel worksheet. While an Excel worksheet can contain
38
- multiple worksheets, pandas ``DataFrame``s exist independently.
38
+ multiple worksheets, pandas ``DataFrame ``\ s exist independently.
39
39
40
40
``Series ``
41
41
~~~~~~~~~~
@@ -75,11 +75,13 @@ This can be achieved by creating a series and assigning it to the desired cells.
75
75
76
76
.. ipython :: python
77
77
78
- df = pd.DataFrame({' AAA' : [1 ] * 8 , ' BBB' : list (range (0 , 8 ))}); df
78
+ df = pd.DataFrame({" AAA" : [1 ] * 8 , " BBB" : list (range (0 , 8 ))})
79
+ df
79
80
80
- series = list (range (1 , 5 )); series
81
+ series = list (range (1 , 5 ))
82
+ series
81
83
82
- df.iloc[2 :( 5 + 1 )].AAA = series
84
+ df.iloc[2 : ( 5 + 1 )].AAA = series
83
85
84
86
df
85
87
@@ -106,7 +108,13 @@ pandas.
106
108
107
109
.. ipython :: python
108
110
109
- df = pd.DataFrame({" class" : [' A' , ' A' , ' A' , ' B' , ' C' , ' D' ], " student_count" : [42 , 35 , 42 , 50 , 47 , 45 ], " all_pass" : [" Yes" , " Yes" , " Yes" , " No" , " No" , " Yes" ]})
111
+ df = pd.DataFrame(
112
+ {
113
+ " class" : [" A" , " A" , " A" , " B" , " C" , " D" ],
114
+ " student_count" : [42 , 35 , 42 , 50 , 47 , 45 ],
115
+ " all_pass" : [" Yes" , " Yes" , " Yes" , " No" , " No" , " Yes" ],
116
+ }
117
+ )
110
118
111
119
df.drop_duplicates()
112
120
@@ -128,12 +136,16 @@ each class.
128
136
129
137
.. ipython :: python
130
138
131
- df[" girls_count" ] = [21 , 12 , 21 , 31 , 23 , 17 ]; df
139
+ df[" girls_count" ] = [21 , 12 , 21 , 31 , 23 , 17 ]
140
+ df
141
+
132
142
133
143
def get_count (row ):
134
144
return row[" student_count" ] - row[" girls_count" ]
135
145
136
- df[" boys_count" ] = df.apply(get_count, axis = 1 ); df
146
+
147
+ df[" boys_count" ] = df.apply(get_count, axis = 1 )
148
+ df
137
149
138
150
139
151
VLOOKUP
@@ -143,16 +155,43 @@ VLOOKUP
143
155
144
156
import random
145
157
146
- df1 = pd.DataFrame({" keys" : [1 , 2 , 3 , 4 , 5 , 6 , 7 ], " first_names" : [" harry" , " ron" ,
147
- " hermione" , " rubius" , " albus" , " severus" , " luna" ]}); df1
148
-
149
- random_names = pd.DataFrame({" surnames" : [" hadrid" , " malfoy" , " lovegood" ,
150
- " dumbledore" , " grindelwald" , " granger" , " weasly" , " riddle" , " longbottom" ,
151
- " snape" ], " keys" : [ random.randint(1 ,7 ) for x in range (0 ,10 ) ]})
158
+ df1 = pd.DataFrame(
159
+ {
160
+ " keys" : [1 , 2 , 3 , 4 , 5 , 6 , 7 ],
161
+ " first_names" : [
162
+ " harry" ,
163
+ " ron" ,
164
+ " hermione" ,
165
+ " rubius" ,
166
+ " albus" ,
167
+ " severus" ,
168
+ " luna" ,
169
+ ],
170
+ }
171
+ )
172
+ df1
173
+
174
+ random_names = pd.DataFrame(
175
+ {
176
+ " surnames" : [
177
+ " hadrid" ,
178
+ " malfoy" ,
179
+ " lovegood" ,
180
+ " dumbledore" ,
181
+ " grindelwald" ,
182
+ " granger" ,
183
+ " weasly" ,
184
+ " riddle" ,
185
+ " longbottom" ,
186
+ " snape" ,
187
+ ],
188
+ " keys" : [random.randint(1 , 7 ) for x in range (0 , 10 )],
189
+ }
190
+ )
152
191
153
192
random_names
154
193
155
- random_names.merge(df1, on = " keys" , how = ' left' )
194
+ random_names.merge(df1, on = " keys" , how = " left" )
156
195
157
196
Adding a row
158
197
~~~~~~~~~~~~
@@ -163,7 +202,8 @@ NOTE: If the index already exists, the values in that index will be over written
163
202
164
203
.. ipython :: python
165
204
166
- df1.iloc[7 ] = [8 , " tonks" ]; df1
205
+ df1.iloc[7 ] = [8 , " tonks" ]
206
+ df1
167
207
168
208
169
209
Search and Replace
0 commit comments