Data Types

    2024-10-17 (last edit: 2024-09-20)

    Aggregating data

    Below is a compact overview of Rust's structs

    #[derive(Clone, Copy, Debug, Eq, PartialEq)]
    struct Position(i32, i32); // tuple struct
    
    // Could Hero derive the Copy trait?
    #[derive(Clone, Debug, Eq, PartialEq)]
    struct Hero {
        name: String,
        level: u32,
        experience: u32,
        position: Position,
    }
    
    // we can add methods to structs using the 'impl' keyword
    impl Hero {
        // static method
        fn new(name: String) -> Hero {
            Hero {
                name,
                level: 1,
                experience: 0,
                position: Position(0, 0),
            }
        }
    }
    
    // multiple impl blocks are possible for one struct
    impl Hero {
        // instance method, first argument (self) is the calling instance
        fn distance(&self, pos: Position) -> u32 {
            // fields of tuples and tuple structs can be accessed through 'tuple.[i]'
            (pos.0 - self.position.0).unsigned_abs() + (pos.1 - self.position.1).unsigned_abs()
        }
    
        // mutable borrow of self allows to change instance fields
        fn level_up(&mut self) {
            self.experience = 0;
            self.level += 1;
        }
    
        // 'self' is not borrowed here and will be moved into the method
        fn die(self) {
            println!(
                "Here lies {}, a hero who reached level {}. RIP.",
                self.name, self.level
            );
        }
    }
    
    fn main() {
        let mut hero: Hero = Hero::new(String::from("Marty The Brave"));
        hero.level_up(); // 'self' is always passed implicitly
    
        // fields other than 'name' will be the same as in 'hero'
        let steve = Hero {
            name: String::from("Steve The Normal Guy"),
            ..hero
        };
    
        assert_eq!(hero.level, steve.level);
    
        let mut twin = hero.clone();
    
        // we can compare Hero objects because it derives the PartialEq trait
        assert_eq!(hero, twin);
        twin.level_up();
        assert_ne!(hero, twin);
        hero.level_up();
        assert_eq!(hero, twin);
    
        // we can print out a the struct's debug string with '{:?}'
        println!("print to stdout: {:?}", hero);
    
        hero.die(); // 'hero' is not usable after this invocation, see the method's definiton
    
        // the dbg! macro prints debug strings to stderr along with file and line number
        dbg!("print to stderr: {}", twin);
    
        let pos = Position(42, 0);
        let dist = steve.distance(pos); // no clone here as Position derives the Copy trait
        println!("{:?}", pos);
        assert_eq!(dist, 42);
    }
    
    

    (Download the source code for this example: data_types.rs)

    Enums

    It is often the case that we want to define a variable that can only take a certain set of values and the values are known up front. In C you can an enum for this.

    #include <stdio.h>
    
    enum shirt_size {
        small,
        medium,
        large,
        xlarge
    };
    
    void print_size(enum shirt_size size) {
        printf("my size is ");
        switch (size) {
            case small:
                printf("small");
                break;
            case medium:
                printf("medium");
                break;
            case large:
                printf("large");
                break;
            case xlarge:
                printf("xlarge");
                break;
            default:
                printf("unknown");
                break;
        }
        printf("\n");
    }
    
    int main() {
        enum shirt_size my_size = medium;
        print_size(my_size);
    }
    
    

    (Download the source code for this example: enums.c)

    However, in C enums are just integers. Nothing prevents us from writing

    int main() {
        enum shirt_size my_size = 666;
        print_size(my_size);
    }
    

    C++ introduces enum classes which are type-safe. Legacy enums are also somewhat safer than in C (same code as above):

    <source>:27:31: error: invalid conversion from 'int' to 'shirt_size' [-fpermissive]
       27 |     enum shirt_size my_size = 666;
          |                               ^~~
          |                               |
          |                               int
    

    Some programming languages (especially functional ones) allow programmers to define enums which carry additional information. Such types are usually called tagged unions or algebraic data types.

    In C++ we can use union with an enum tag to define it:

    #include <iostream>
    
    // Taken from: https://en.cppreference.com/w/cpp/language/union
    
    // S has one non-static data member (tag), three enumerator members (CHAR, INT, DOUBLE),
    // and three variant members (c, i, d)
    struct S
    {
        enum{CHAR, INT, DOUBLE} tag;
        union
        {
            char c;
            int i;
            double d;
        };
    };
    
    void print_s(const S& s)
    {
        switch(s.tag)
        {
            case S::CHAR: std::cout << s.c << '\n'; break;
            case S::INT: std::cout << s.i << '\n'; break;
            case S::DOUBLE: std::cout << s.d << '\n'; break;
        }
    }
    
    int main()
    {
        S s = {S::CHAR, 'a'};
        print_s(s);
        s.tag = S::INT;
        s.i = 123;
        print_s(s);
    }
    
    

    (Download the source code for this example: tagged_union.cpp)

    C++17 introduced a new feature called variant which generalizes this concept. You can read more about it here.

    Java has a more or less analogous feature called sealed classes since version 17.

    Enums in Rust

    Let's see how they are defined in Rust.

    #![allow(unused_assignments)]
    #![allow(unused_variables)]
    #![allow(dead_code)]
    
    #[derive(Debug)]
    enum NamedSize {
        Small,
        Medium,
        Large,
        XL,
    }
    
    #[derive(Debug)]
    enum ShirtSize {
        Named(NamedSize),
        Numeric(u32),
    }
    
    fn main() {
        println!(
            "Isn't it strange that some clothes' sizes are adjectives like {:?},",
            ShirtSize::Named(NamedSize::Small)
        );
        println!(
            "but sometimes they are numbers like {:?}?",
            ShirtSize::Numeric(42)
        );
    }
    
    

    (Download the source code for this example: enums.rs)

    In Rust, enums are a core feature of the language. You may have heard that one of Rust's defining characteristics is the absence of "the billion dollar mistake". So what can we do to say that a value is missing if there is no null?

    In Rust, we can use the Option type to represent the absence of a value.

    Option is defined as:

    enum Option<T> {
        Some(T),
        None,
    }
    

    The <T> part is called the "type parameter" and it causes Option to be generic. We won't go deeper into this for now.

    The fact that variables which could be null in other languages have a different type in Rust is the solution to the billion dollar mistake!

    #![allow(unused_assignments)]
    #![allow(unused_variables)]
    #![allow(dead_code)]
    
    fn main() {
        let mut not_null: i32 = 42;
        not_null = 43;
        // not_null = None; // this won't compile because it's a different type!
    
        let mut nullable: Option<i32> = Some(42);
        nullable = None;
        nullable = Some(43);
    
        // such construction is rare, but possible
        let mut double_nullable: Option<Option<i32>> = Some(Some(42));
        // assert_ne!(double_nullable, Some(42)); // this won't even compile because it's a different type!
        double_nullable = None;
        double_nullable = Some(None);
    
        // None and Some(None) are different!
        assert_ne!(double_nullable, None);
    
        // Now recall that division by 0 *panics*
        // A panic is an unrecoverable error
        // It is not an exception!
        // And in Rust there are no exceptions, so there are no try/catch blocks
        // Now let's imagine that we want to divide one number by another
        fn divide(dividend: i32, divisor: i32) -> i32 {
            dividend / divisor
        }
    
        // We get the divisor from the user, so it can be 0
        // We want to handle this situation gracefully - we don't want to crash the program!
        // We can do this by using the Option<T> type
        fn safe_divide(dividend: i32, divisor: i32) -> Option<i32> {
            if divisor == 0 {
                None
            } else {
                Some(dividend / divisor)
            }
        }
    
        // Fortunately, such a function is already included in the standard library
        let number: i32 = 42;
        // We need to specify the type explicitly
        // because checked_div is implemented for all integer types
        // and Rust won't know which type we want to use
        assert_eq!(number.checked_div(2), Some(21));
        assert_eq!(number.checked_div(0), None);
    
        // Now let's imagine we search for a value in an array.
        let numbers = [1, 2, 3, 4, 5];
        let three = numbers.iter().copied().find(|&x| x == 3);
        assert_eq!(three, Some(3));
        let seven = numbers.iter().copied().find(|&x| x == 7);
        assert_eq!(seven, None);
        // We won't delve deeper into the details of how iterators work for now,
        // but the key takeaway is that there are no sentinel or special values like `nullptr` in Rust
    
        // Usually there are two kinds of methods:
        // ones that will panic if the argument is incorrect,
        // numbers[8]; // this will panic!
        // and `checked` ones that return an Option
        assert_eq!(numbers.get(8), None);
    
        // We can use `unwrap` to get the value out of an Option
        // but we must be absolutely sure that the Option is Some, otherwise we'll get a panic
        // numbers.get(8).unwrap(); // this will panic!
        assert_eq!(numbers.get(8).copied().unwrap_or(0), 0); // or we can provide a default value
    
        // Usually instead of unwrapping we use pattern matching, we'll get to this in a minute
        // but first let's see what else we can do with an option
        let number: Option<i32> = Some(42);
        // We can use `map` to transform the value inside an Option
        let doubled = number.map(|x| x * 2);
        assert_eq!(doubled, Some(84));
        // We can use flatten to reduce one level of nesting
        let nested = Some(Some(42));
        assert_eq!(nested.flatten(), Some(42));
        // We can use `and_then` to chain multiple options
        // This operation is called `flatmap` in some languages
        let chained = number
            .and_then(|x| x.checked_div(0))
            .and_then(|x| x.checked_div(2));
        assert_eq!(chained, None);
    
        // The last two things we'll cover here are `take` and `replace`
        // They are important when dealing with non-Copy types
        // `take` will return the value inside an Option and leave a None in its place
        let mut option: Option<i32> = None;
        // Again, we need to specify the type
        // Even though we want to say that there is no value inside the Option,
        // this absent value must have a concrete type!
        assert_eq!(option.take(), None);
        assert_eq!(option, None);
    
        let mut x = Some(2);
        let y = x.take();
        assert_eq!(x, None);
        assert_eq!(y, Some(2));
    
        // `replace` can be used to swap the value inside an Option
        let mut x = Some(2);
        let old = x.replace(5);
        assert_eq!(x, Some(5));
        assert_eq!(old, Some(2));
    
        let mut x = None;
        let old = x.replace(3);
        assert_eq!(x, Some(3));
        assert_eq!(old, None);
    }
    
    

    (Download the source code for this example: option.rs)

    Pattern matching

    Pattern matching is a powerful feature of Rust and many functional languages, but it's slowly making its way into imperative languages like Java and Python as well.

    #![allow(dead_code)]
    #![allow(unused_variables)]
    
    fn main() {
        // Pattern matching is basically a switch on steroids.
        let number = rand::random::<i32>();
        match number % 7 {
            0 => println!("{number} is divisible by 7"),
            1 => println!("{number} is *almost* divisible by 7"),
            _ => println!("{number} is not divisible by 7"),
        }
    
        #[derive(Debug)]
        enum Color {
            Pink,
            Brown,
            Lime,
        }
    
        let color = Color::Lime;
        match color {
            Color::Pink => println!("My favorite color!"),
            _ => println!("Not my favorite color!"), // _ is a wildcard
                                                     // Rust will statically check that we covered all cases or included a default case.
        }
    
        // We can also use pattern matching to match on multiple values.
        match (color, number % 7) {
            (Color::Pink, 0) => println!("My favorite color and number!"),
            (Color::Pink, _) => println!("My favorite color!"),
            (_, 0) => println!("My favorite number!"),
            (_, _) => println!("Not my favorite color or number!"),
        }
        // (This is not special syntax, we're just pattern matching tuples.)
    
        // But we can also *destructure* the value
        struct Human {
            age: u8,
            favorite_color: Color,
        }
    
        let john = Human {
            age: 42,
            favorite_color: Color::Pink,
        };
    
        match &john {
            Human {
                age: 42,
                favorite_color: Color::Pink,
            } => println!("Okay, that's John!"),
            Human {
                favorite_color: Color::Pink,
                ..
            } => println!("Not John, but still his favorite color!"),
            _ => println!("Somebody else?"),
        }
    
        // Note two things:
        // 1. Color is *not* Eq, so we can't use == to compare it, but pattern matching is fine.
        // 2. We *borrowed* the value, so we can use it after the match.
    
        println!("John is {} years old and still kicking!", john.age);
    
        // To save some time, we can use `if let` to match against only one thing
        // We could also use `while let ... {}` in the same way
        if let Color::Pink = &john.favorite_color {
            println!("He's also a man of great taste");
        }
    
        // We can match ranges...
        match john.age {
            0..=12 => println!("John is a kid!"),
            13..=19 => println!("John is a teenager!"),
            20..=29 => println!("John is a young adult!"),
            30..=49 => println!("John is an adult!"),
            50..=69 => println!("John is mature!"),
            _ => println!("John is old!"),
        }
    
        // We can use match and capture the value at the same time.
        match john.age {
            age @ 0..=12 => println!("John is a kid, age {}", age),
            age @ 13..=19 => println!("John is a teenager, age {}", age),
            age @ 20..=29 => println!("John is a young adult, age {}", age),
            age @ 30..=49 => println!("John is an adult, age {}", age),
            age @ 50..=69 => println!("John is mature, age {}", age),
            age => println!("John is old, age {}", age),
        }
    
        // We can use guards to check for multiple conditions.
        match john.age {
            age @ 12..=19 if age % 2 == 1 => println!("John is an *odd* teenager, age {}", age),
            age if age % 2 == 0 => println!("John is an *even* man, age {}", age),
            _ => println!("John is normal"),
        }
    
        // Finally, let's look at some references now
        let reference: &i32 = &4;
    
        match reference {
            &val => println!("Value under reference is: {}", val),
        }
    
        // `ref` can be used to create a reference when destructuring
        let Human {
            age,
            ref favorite_color,
        } = john;
        // `john` is still valid, because we borrowed using `ref`
        if let Color::Pink = &john.favorite_color {
            println!("John still has his color - {:?}!", favorite_color);
        }
    
        let mut john = john;
    
        // `ref mut` borrows mutably
        let Human {
            age,
            ref mut favorite_color,
        } = john;
        // We use `*` to dereference
        *favorite_color = Color::Brown;
        println!(
            "Tastes do change with time and John likes {:?} now.",
            john.favorite_color
        );
    }
    
    

    (Download the source code for this example: pattern_matching.rs)

    Result

    We said there are no exceptions in Rust and panics mean errors which cannot be caught. So how do we handle situations which can fail? That's where the Result type comes in.

    #![allow(dead_code)]
    #![allow(unused_variables)]
    
    use std::fs::File;
    use std::io;
    use std::io::Read;
    
    // Let's try reading from a file.
    // Obviously this can fail.
    fn first_try() -> io::Result<String> {
        let file = File::open("/dev/random");
        match file {
            Ok(mut file) => {
                // We got a file!
                let mut buffer = vec![0; 128];
                // Matching each result quickly become tedious...
                match file.read_exact(&mut buffer) {
                    Ok(_) => {
                        let gibberish = String::from_utf8_lossy(&buffer);
                        Ok(gibberish.to_string())
                    }
                    Err(error) => Err(error),
                }
            }
            Err(error) => {
                Err(error) // This is needed in order to change the type from `io::Result<File>` to `io::Result<()>`
            }
        }
    }
    
    // The '?' operator allows us to return early in case of an error
    // (it automatically converts the error type)
    fn second_try(filename: &'static str) -> io::Result<String> {
        let mut file = File::open(filename)?;
        let mut buffer = vec![0; 128];
        file.read_exact(&mut buffer)?;
        let gibberish = String::from_utf8_lossy(&buffer);
        Ok(gibberish.to_string())
    }
    
    fn main() {
        let filenames = [
            "/dev/random",
            "/dev/null",
            "/dev/cpu",
            "/dev/fuse",
            "there_certainly_is_no_such_file",
        ];
        for filename in filenames {
            println!("Trying to read from '{}'", filename);
            match second_try(filename) {
                Ok(gibberish) => println!("{}", gibberish),
                Err(error) => println!("Error: {}", error),
            }
        }
    }
    
    

    (Download the source code for this example: result.rs)

    Obligatory reading