Published on

Design File System

Authors
  • avatar
    Name
    Gene Zhang
    Twitter

Key Concept: Trie/Tree for Hierarchical Paths - Use trie (prefix tree) or nested hash map to represent file system hierarchy. Each node represents a directory or file with its value.

Pattern: Hierarchical data structure design using trie or tree.

# Design a file system that allows you to create new paths and associate them with values.
#
# Implement the FileSystem class:
# - bool createPath(string path, int value): Creates a new path and associates
#   a value with it if possible. Returns true if successful, false otherwise.
#   A path is valid if:
#   1. The path doesn't already exist
#   2. The parent path exists (except for root paths like "/a")
# - int get(string path): Returns the value associated with path or -1 if not found.
#
# Example:
# Input: ["FileSystem", "createPath", "get"]
#        [[], ["/a", 1], ["/a"]]
# Output: [null, true, 1]
#
# Input: ["FileSystem", "createPath", "createPath", "get", "createPath", "get"]
#        [[], ["/leet", 1], ["/leet/code", 2], ["/leet/code"], ["/c/d", 1], ["/c"]]
# Output: [null, true, true, 2, false, -1]
# Explanation: "/c/d" can't be created because "/c" doesn't exist.
#
# Constraints:
# - Path is a string starting with '/' followed by alphanumeric characters separated by '/'
# - 2 <= path.length <= 100
# - -10^9 <= value <= 10^9

class TrieNode:
    def __init__(self):
        self.children = {}
        self.value = -1  # -1 means not a valid path endpoint
        self.is_path = False

class FileSystem:
    def __init__(self):
        self.root = TrieNode()

    def createPath(self, path: str, value: int) -> bool:
        # Split path into components
        components = path.split('/')[1:]  # Skip empty string from leading '/'

        if not components:
            return False

        node = self.root

        # Traverse to parent of the path to create
        for i in range(len(components) - 1):
            component = components[i]
            if component not in node.children:
                # Parent path doesn't exist
                return False
            node = node.children[component]

        # Create the final component
        final_component = components[-1]
        if final_component in node.children:
            # Path already exists
            return False

        # Create new path
        new_node = TrieNode()
        new_node.value = value
        new_node.is_path = True
        node.children[final_component] = new_node
        return True

    def get(self, path: str) -> int:
        components = path.split('/')[1:]

        if not components:
            return -1

        node = self.root

        # Traverse the path
        for component in components:
            if component not in node.children:
                return -1
            node = node.children[component]

        return node.value if node.is_path else -1


# Alternative: Using HashMap
class FileSystem2:
    def __init__(self):
        self.paths = {}  # path -> value

    def createPath(self, path: str, value: int) -> bool:
        # Check if path already exists
        if path in self.paths or path == "/":
            return False

        # Check if parent exists
        parent_path = path[:path.rfind('/')]
        if parent_path != "" and parent_path not in self.paths:
            return False

        # Create path
        self.paths[path] = value
        return True

    def get(self, path: str) -> int:
        return self.paths.get(path, -1)


# Alternative: Using nested dict for more flexibility
class FileSystem3:
    def __init__(self):
        self.root = {}

    def createPath(self, path: str, value: int) -> bool:
        components = [c for c in path.split('/') if c]

        if not components:
            return False

        current = self.root

        # Check parent exists
        for component in components[:-1]:
            if component not in current:
                return False
            current = current[component]

        # Create final component
        final = components[-1]
        if final in current:
            return False

        current[final] = {'__value__': value}
        return True

    def get(self, path: str) -> int:
        components = [c for c in path.split('/') if c]

        current = self.root
        for component in components:
            if component not in current:
                return -1
            current = current[component]

        return current.get('__value__', -1)

# Trie: Time O(k) for both operations where k is path length, Space O(n*k)
# HashMap: Time O(k) for both operations, Space O(n*k)
#
# Data Structure Comparison:
# 1. Trie: Better for prefix operations, listing directories
# 2. HashMap: Simpler, good for exact path lookup
# 3. Nested Dict: More flexible, easier to extend with file operations
#
# Key Operations:
# - createPath: Verify parent exists, create if not duplicate
# - get: Traverse path, return value if exists
#
# Extensions (follow-up):
# - ls(path): List all paths with given prefix
# - delete(path): Remove path if exists
# - move(src, dst): Move file/directory
# - mkdir(path): Create directory (without value)
#
# AirBnB: Very relevant to their file/folder organization systems
# Tests understanding of hierarchical data structures