#don't forget to
import pandas
import numpy

# a quick 3x4 array
data_2d = numpy.arange(12).reshape(3, 4)

df_from_array = pandas.DataFrame(data=data_2d,
                                 index=["row1", "row2", "row3"],
                                 columns=["column1", "column2", "column3", "column4"]
                                 )

# relabeling twice
new_df = df_from_array.rename({"row1":"ROW1"}).rename({"column1":"COLUMN1"}, axis=1)

# display both
display(df_from_array)
print()
display(new_df)

#your code here

# a quick 3x4 array
data_2d = numpy.arange(12).reshape(3, 4)

df_from_array = pandas.DataFrame(data=data_2d,
                                 index=["row1", "row2", "row3"],
                                 columns=["column1", "column2", "column3", "column4"]
                                 )

new_df = df_from_array.drop('row1', axis=0)
display(new_df)

new_df = df_from_array.drop(['column1','column2'], axis=1)
display(new_df)

#your code here

temp_dictionary = {
    "Date": ["2023-05-01", "2023-05-02", "2023-05-03", "2023-05-04", "2023-05-05"],
    "Latitude": [34.05, 37.77, 40.71, 34.05, 35.68],
    "Longitude": [-118.25, -122.42, -74.01, -118.25, 139.69],
    "Magnitude": [4.5, 3.8, 5.2, 4.0, 6.1],
    "Depth (km)": [10.0, 12.5, 8.0, 15.0, 20.0],
    "Region": ["City1", "City2", "City3", "City4", "City5"]
}
df_from_lists = pandas.DataFrame(
    temp_dictionary,
    index = ['r1', 'r2', 'r3', 'r4', 'r5']
)
display(df_from_lists)

display(df_from_lists.sort_index(axis=1) )

#your code here

temp_dictionary = {
    "Date": ["2023-05-01", "2023-05-02", "2023-05-03", "2023-05-04", "2023-05-05"],
    "Latitude": [34.05, 37.77, 40.71, 34.05, 35.68],
    "Longitude": [-118.25, -122.42, -74.01, -118.25, 139.69],
    "Magnitude": [4.5, 3.8, 5.2, 4.0, 6.1],
    "Depth (km)": [10.0, 12.5, 8.0, 15.0, 20.0],
    "Region": ["City1", "City2", "City3", "City4", "City5"]
}
df_from_lists = pandas.DataFrame(
    temp_dictionary,
    index = ['r1', 'r2', 'r3', 'r4', 'r5']
)
display(df_from_lists)

display(df_from_lists.sort_values(by='Magnitude'))

#your code here

temp_dictionary = {
    'state' : ['VA', 'VA', 'VA', 'MD', 'MD'],
    'year' : [2012, 2013, 2014, 2014, 2015],
    'pop' : [5.0, 5.1, 5.2, 4.0, 4.1],
    'unempl' : [5.8, 5.8, 5.8, 4.9, 4.9]
}
df_from_lists = pandas.DataFrame(
    temp_dictionary,
    columns=['year', 'state', 'pop', 'unempl'],
    index = ['r1', 'r2', 'r3', 'r4', 'r5']
)

print(df_from_lists['pop'].mean() )

4.68

print(df_from_lists.mean(numeric_only=True) )

year      2013.60
pop          4.68
unempl       5.44
dtype: float64

#your code here

temp_dictionary = {
    'state' : ['VA', 'VA', 'VA', 'MD', 'MD'],
    'year' : [2012, 2013, 2014, 2014, 2015],
    'pop' : [5.0, 5.1, 5.2, 4.0, 4.1],
    'unempl' : [5.8, 5.8, 5.8, 4.9, 4.9]
}
df_from_lists = pandas.DataFrame(
    temp_dictionary,
    columns=['year', 'state', 'pop', 'unempl'],
    index = ['r1', 'r2', 'r3', 'r4', 'r5']
)
display(df_from_lists)

grouped_df = df_from_lists.groupby("state")
print(grouped_df.groups)

{'MD': ['r4', 'r5'], 'VA': ['r1', 'r2', 'r3']}

sub_df = grouped_df.get_group('MD')
display(sub_df)

for group_label, sub_df in grouped_df:
  print(group_label)
  display(sub_df)
  print()

MD

VA

#your code here

temp_dictionary = {
    'state' : ['VA', 'VA', 'VA', 'MD', 'MD'],
    'year' : [2012, 2013, 2014, 2014, 2015],
    'pop' : [5.0, 5.1, 5.2, 4.0, 4.1],
    'unempl' : [5.8, 5.8, 5.8, 4.9, 4.9]
}
df_from_lists = pandas.DataFrame(
    temp_dictionary,
    columns=['year', 'state', 'pop', 'unempl'],
    index = ['r1', 'r2', 'r3', 'r4', 'r5']
)
grouped_df = df_from_lists.groupby("state")

reduced_df = grouped_df.mean()
display(reduced_df)

reduced_df = grouped_df.agg('mean')
display(reduced_df)

national_economics = pandas.DataFrame({
    'Country': ['United States','China', 'Japan', 'Germany', 'India', 'United Kingdom', 'France', 'Italy', 'Brazil', 'Canada'],
    'Continent': ['America', 'Asia', 'Asia', 'Europe', 'Asia','Europe', 'Europe','Europe', 'America', 'America'],
    'GDP (Billions)': [18624.5, 11218.3, 4936.2, 3477.8, 2259.6, 2647.9, 2465.5, 1858.9, 1795.9, 1529.8],
    'Population (Millions)': [332.9, 1444.2, 126.1, 83.9, 1393.4, 68.2, 65.4, 60.4, 214.0, 38.1],
})

	Category	other data
row1	type A	1.000
row2	type B	2.000
row3	type A	3.000
row4	type B	4.000

group label	sub-DataFrame
group 1	DataFrame 1
group 2	DataFrame 2

Intermediate Python¶

"DataFrame Operations"¶

Pandas Module¶

DataFrame Methods¶

Relabel a row or column¶

Example 1¶

Exercise 1¶

Dropping Entries¶

Example 2¶

Exercise 2¶

Sorting¶

Sorting Labels¶

Example 3¶

Exercise 3¶

Sorting Values¶

Sorting with Sub-sorting¶

Example 4¶

Exercise 4¶

Reducing Methods¶

Reduce One Column¶

Reduce All Columns¶

Example 5¶

Exercise 5¶

Data Grouping¶

Grouped DataFrames¶

Example 6¶

Exercise 6¶

Grouped DataFrame Operations¶

Aggregate Method¶

Example 7¶

Exercise 7¶

	Date	Latitude	Longitude	Magnitude	Depth (km)	Region
r1	2023-05-01	34.05	-118.25	4.5	10.0	City1
r2	2023-05-02	37.77	-122.42	3.8	12.5	City2
r3	2023-05-03	40.71	-74.01	5.2	8.0	City3
r4	2023-05-04	34.05	-118.25	4.0	15.0	City4
r5	2023-05-05	35.68	139.69	6.1	20.0	City5

	year	state	pop	unempl
r1	2012	VA	5.0	5.8
r2	2013	VA	5.1	5.8
r3	2014	VA	5.2	5.8
r4	2014	MD	4.0	4.9
r5	2015	MD	4.1	4.9

	column1	column2	column3	column4
row1	0	1	2	3
row2	4	5	6	7
row3	8	9	10	11

	COLUMN1	column2	column3	column4
ROW1	0	1	2	3
row2	4	5	6	7
row3	8	9	10	11

	column1	column2	column3	column4
row2	4	5	6	7
row3	8	9	10	11