Visual Code(data=example):

Search Text in optgroup2file.txt

Enter search string:

Visual Code(data=input box):

then to see the Example

Age,Female,Male 0~10,5,6 11~20,15,14 21~30,20,22 31~40,25,24 41~50,25,28 51~60,30,26 61~70,20,18 71~80,15,16 81~90,10,12

for Extractcountries

Copy & Paste the Code in R to Rstudio

# # ======================================== # Step 1: Install and load required packages # ======================================== # install.packages(c("stringr", "dplyr", "readr")) library(stringr) library(dplyr) library(readr) library(tidyr) # <--- This is the key one you missed # ======================================== # Step 2: Load your real data from CSV # ======================================== df <- read_csv("F:/RR/abstractcountry.csv") # Adjust path as needed # Check column name and rename it to `abstract` if needed names(df) <- tolower(names(df)) # Makes all column names lowercase colnames(df)[1] <- "abstract" # Rename first column to 'abstract' if it's unnamed or 'abstract' # Add ID column if needed df <- df %>% mutate(id = row_number()) # ======================================== # Step 3: Use a list of country names (you can expand this!) # ======================================== countries <- c( "Albania", "Algeria", "Andorra", "Anguilla", "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan", "Bahrain", "Bangladesh", "Belgium", "Belize", "Bhutan", "Bolivia", "Bosnia & Herceg", "Botswana", "Brasil", "Brazil", "Brunei", "Bulgaria", "Cameroon", "Canada", "Chile", "China", "Colombia", "Costa Rica", "Croatia", "Cuba", "Cyprus", "Czech Republic", "Denmark", "Dominican Rep", "Ecuador", "Egypt", "El Salvador", "Estonia", "Ethiopia", "Fiji", "Finland", "France", "Georgia", "Germany", "Greece", "Guatemala", "Guinea", "Haiti", "Hong Kong", "Hungary", "Iceland", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy", "Japan", "Jordan", "Kazakhstan", "Kosovo", "Kuwait", "Kyrgyzstan", "Latvia", "Lebanon", "Lithuania", "Luxembourg", "Macedonia", "Malawi", "Malaysia", "Malta", "Mexico", "Micronesia", "Mongolia", "Montenegro", "Morocco", "Myanmar", "Nepal", "Netherlands", "New Caledonia", "New Zealand", "Nicaragua", "Nigeria", "North Macedonia", "Norway", "Oman", "Pakistan", "Palestine", "Panama", "Papua Niugini", "Peru", "Philippines", "Pilipinas", "Poland", "Portugal", "Qatar", "Rep Congo", "Republique democratique du Congo", "Romania", "Russia", "San Marino", "Saudi Arabia", "Serbia", "Sierra Leone", "Singapore", "Slovakia", "Slovenia", "Solomon Islands", "South Africa", "South Korea", "Spain", "Sri Lanka", "Sudan", "Sweden", "Switzerland", "Syria", "Taiwan", "Tanzania", "Thailand", "Trinidad Tobago", "Tunisia", "Turkey", "Turkiye", "Turkmenistan", "U Arab Emirates", "U.K.", "U.S.", "Uganda", "Ukraine", "United", "Uruguay", "Uzbekistan", "Vanuatu", "Venezuela", "Vietnam", "Viti", "Zimbabwe" ) # Sort by length to avoid partial matches countries <- countries[order(nchar(countries), decreasing = TRUE)] countries <- countries[order(nchar(countries), decreasing = TRUE)] # Sort by length to avoid partial matches # ======================================== # Step 4: Define extraction function # ======================================== extract_countries <- function(text) { pattern <- str_c("\\b(", str_c(countries, collapse = "|"), ")\\b") matches <- str_extract_all(text, pattern) unique(unlist(matches)) } # ======================================== # Step 5: Apply function safely using lapply and store as list-column # ======================================== df$country_mentions <- I(lapply(df$abstract, extract_countries)) # ======================================== # Step 6: View results # ======================================== # View some rows head(df) df <- df %>% rowwise() %>% mutate( mentions = if (length(country_mentions) == 0) NA_character_ else paste(country_mentions, collapse = ",") ) %>% ungroup() # Step 2: Remove NA rows, split by comma, and unnest into one row per country df_clean <- df %>% filter(!is.na(mentions), mentions != "") # Step 2: Split the comma-separated countries into multiple columns output_df_wide <- df_clean %>% mutate(mentions = str_replace_all(mentions, "\\s+", "")) %>% # Remove spaces between country names separate(mentions, into = paste0("country_", 1:10), sep = ",", fill = "right", extra = "drop") # Max 10 columns # Step 3: Select only the new country columns (exclude original 'mentions') output_df <- output_df_wide %>% select(starts_with("country_")) output_df_long <- output_df_wide %>% select(starts_with("country_")) %>% # Only include cleaned columns mutate(across(everything(), as.character)) %>% # Convert all to character pivot_longer(cols = everything(), names_to = "col", values_to = "country") %>% filter(!is.na(country), country != "") %>% mutate( country = str_remove_all(country, 'c\$|\$|"|\\\\'), # Clean R artifacts country = str_trim(country) ) %>% filter(country != "") %>% distinct() # Optional: remove duplicates # Remove duplicates (optional) output_df_cleaned <- output_df_wide %>% select(-1) %>% # Remove first column (country_m) mutate(across(everything(), ~replace_na(.x, ""))) # Replace NA with "" # View cleaned data # Step 1: Remove the first two columns output_df_cleaned <- output_df_cleaned %>% select(-id, -country_mentions) # Step 2: Replace NA with "" (if not already done) output_df_cleaned <- output_df_cleaned %>% mutate(across(everything(), ~replace_na(.x, ""))) # Step 3: Fix incomplete country name "United" to "United States" output_df_cleaned <- output_df_cleaned %>% mutate(across(everything(), ~str_replace_all(.x, "^United$", "United States"))) # Optional: View cleaned data print(head(output_df_cleaned, 10)) write.csv(output_df_cleaned, "F:/RR/countrylist.csv", row.names = FALSE) output_df_cleaned # Optional message cat("✅ Results written to: F:/RR/countrylist.csv\n")