import pandas as pd
import os

# Define the base path
base_path = os.path.dirname(os.path.abspath(__file__))

# Define paths for input and output files
input_xlsx_path = os.path.join(base_path, 'apparel_data_with_colors_and_sizes.xlsx')
output_xlsx_path = os.path.join(base_path, 'combined_color_and_size_data.xlsx')
id_csv_path = os.path.join(base_path, 'data', 'color_size_ids.csv')

# Check if the input Excel file exists
if not os.path.exists(input_xlsx_path):
    raise FileNotFoundError(f"No such file or directory: '{input_xlsx_path}'")

# Read the existing Excel file
apparel_df = pd.read_excel(input_xlsx_path, sheet_name='Apparel Sweatshirts Data')

# Extract and de-duplicate the Color_Code column, excluding empty cells
color_code_df = apparel_df[['Color_Code']].drop_duplicates().dropna().reset_index(drop=True)

# Clean the Sizes column by removing extra spaces
apparel_df['Sizes'] = apparel_df['Sizes'].astype(str).apply(lambda x: ' '.join(x.split()))

# Extract and de-duplicate the Sizes column, excluding empty cells
sizes_df = apparel_df[['Sizes']].drop_duplicates().dropna().reset_index(drop=True)

# Check if the ID CSV file exists
if os.path.exists(id_csv_path):
    id_df = pd.read_csv(id_csv_path)
else:
    # Create an empty DataFrame with columns Type, Value, and ID
    id_df = pd.DataFrame(columns=['Type', 'Value', 'ID'])

# Function to assign IDs and update the ID DataFrame
def assign_ids(df, id_df, column_name, type_name):
    ids = []
    for value in df[column_name]:
        if value in id_df[id_df['Type'] == type_name]['Value'].values:
            ids.append(id_df[(id_df['Type'] == type_name) & (id_df['Value'] == value)]['ID'].values[0])
        else:
            new_id = f"{len(id_df) + 1:05d}"
            ids.append(new_id)
            new_row = pd.DataFrame({'Type': [type_name], 'Value': [value], 'ID': [new_id]})
            id_df = pd.concat([id_df, new_row], ignore_index=True)
    return ids, id_df

# Assign IDs to Color_Code and Sizes
color_ids, id_df = assign_ids(color_code_df, id_df, 'Color_Code', 'Color')
size_ids, id_df = assign_ids(sizes_df, id_df, 'Sizes', 'Size')

# Add the assigned IDs to the respective DataFrames
color_code_df['ID'] = color_ids
sizes_df['ID'] = size_ids

# Save the updated ID DataFrame to CSV
id_df.to_csv(id_csv_path, index=False)

# Add the xromatologio and megethologio columns
color_code_df['xromatologio'] = 3000001
sizes_df['megethologio'] = 3000000

# Create a new dataframe to hold the final combined data
combined_df = pd.DataFrame()

# Add Color_Code, xromatologio, and ID to the combined dataframe
combined_df['Color_Code'] = color_code_df['Color_Code']
combined_df['xromatologio'] = color_code_df['xromatologio']
combined_df['Color_ID'] = color_code_df['ID']

# Add Sizes, megethologio, and ID to the combined dataframe
combined_df['Sizes'] = sizes_df['Sizes']
combined_df['megethologio'] = sizes_df['megethologio']
combined_df['Size_ID'] = sizes_df['ID']

# Write the resulting data to a new Excel file
combined_df.to_excel(output_xlsx_path, sheet_name='Combined Data', index=False)

print(f"Combined color and size data saved in {output_xlsx_path}.")
