@@ -11,7 +11,6 @@ defmodule DiffWeb.LiveView do
1111            DiffWeb.TooLargeComponent . render ( % { file:  file_path } ) 
1212            |>  Phoenix.HTML.Safe . to_iodata ( ) 
1313            |>  IO . iodata_to_binary ( ) 
14-             |>  sanitize_utf8 ( ) 
1514
1615          { :ok ,  % { "diff"  =>  raw_diff ,  "path_from"  =>  path_from ,  "path_to"  =>  path_to } }  -> 
1716            case  GitDiff . parse_patch ( raw_diff ,  relative_from:  path_from ,  relative_to:  path_to )  do 
@@ -38,42 +37,16 @@ defmodule DiffWeb.LiveView do
3837    end 
3938  end 
4039
41-   defp  sanitize_utf8 ( content )  when  is_binary ( content )  do 
42-     case  String . valid? ( content )  do 
43-       true  -> 
44-         content 
45- 
46-       false  -> 
47-         # Multiple fallback strategies for invalid UTF-8 
48-         sanitize_invalid_bytes ( content ) 
49-     end 
50-   end 
51- 
52-   defp  sanitize_utf8 ( content ) ,  do:  content 
53- 
54-   defp  sanitize_invalid_bytes ( content )  do 
55-     # Try different encoding conversions and fallbacks 
56-     cond  do 
57-       # Try converting from Latin-1/ISO-8859-1 encoding 
58-       latin1_result  =  safe_unicode_convert ( content ,  :latin1 ,  :utf8 )  -> 
59-         latin1_result 
60- 
61-       # Last resort: replace invalid bytes with replacement character 
62-       true  -> 
63-         content 
64-         |>  :binary . bin_to_list ( ) 
65-         # Replace high bytes with '?' 
66-         |>  Enum . map ( fn  byte  ->  if  byte  >  127 ,  do:  63 ,  else:  byte  end ) 
67-         |>  :binary . list_to_bin ( ) 
68-     end 
69-   end 
70- 
71-   defp  safe_unicode_convert ( content ,  from ,  to )  do 
72-     case  :unicode . characters_to_binary ( content ,  from ,  to )  do 
73-       result  when  is_binary ( result )  ->  result 
74-       _  ->  nil 
75-     end 
76-   rescue 
77-     _  ->  nil 
40+   def  sanitize_utf8 ( content )  when  is_binary ( content )  do 
41+     content 
42+     |>  String . chunk ( :valid ) 
43+     |>  Enum . map ( fn  chunk  -> 
44+       if  String . valid? ( chunk )  do 
45+         chunk 
46+       else 
47+         String . duplicate ( "?" ,  byte_size ( chunk ) ) 
48+       end 
49+     end ) 
50+     |>  Enum . join ( "" ) 
7851  end 
7952end 
0 commit comments