remove malicious script tags from file

Here's a small Windows Forms application that I created to automate removal of malicious SCRIPT tags inserted into some web files. [more] (or in general – even non malicious scripts).

Of course, you can always do this manually but if we're talking of hundreds or thousands of files, it will be one heck of a job.

The idea is to:

1) retrieve list of all script tags in all files in a given folder (including subfolders)

2) list scripts found

3) select the scripts to remove – ALSO, if the script contains line break, select it then click on the [View Script Detail] button. Also note that the checkedListBox is not set to check on click

4) set a folder to save the "cleaned" file

5) then process (remove the selected scripts and they will be saved on the Target Folder – retaining their folder hierarchy)

That's it

Here's a glimpse at the "core" code for the application. Note that I employed recursion inside of the faster, better performing stack approach for simplicity.

The complete source code can be downloaded below. Along with the output (executable).

** Search a root folder (and subfolder and files) for script tags (and their contents ofcourse)

   70 // recursive

   71         private void SearchFolder(string newRootFolder)

   72         {

   73             DirectoryInfo rootDir = new DirectoryInfo(newRootFolder);

   74             foreach (FileInfo fi in rootDir.GetFiles())

   75             {

   76                 SearchFile(fi);

   77             }

   78 

   79             foreach (DirectoryInfo di in rootDir.GetDirectories())

   80             {

   81                 SearchFolder(di.FullName);

   82             }

   83         }

   84 

   85         private void SearchFile(FileInfo fi)

   86         {

   87             using (StreamReader sr = new StreamReader(fi.FullName))

   88             {

   89                 string fileContent = sr.ReadToEnd();

   90                 MatchCollection ms =

   91                     Regex.Matches(

   92                         fileContent,

   93                         @"<script([^>]*)>.*?</script>",

   94                         RegexOptions.Singleline); // handle line breaks inside script tags

   95 

   96                 foreach (Match m in ms)

   97                 {

   98                     if (checkedListBox1.Items.Contains(m.Value))

   99                         continue;

  100 

  101                     checkedListBox1.Items.Add(m.Value);

  102                 }

  103             }

  104         }

** Process a root folder (and subfolder and files), check if a script marked as to be removed is found, replace it with empty string (effectively removing it) then save the file on the Target Folder.

  105 

  106         // recursive

  107         private void ProcessFolder(string newRootFolder)

  108         {

  109             DirectoryInfo rootDir = new DirectoryInfo(newRootFolder);

  110             foreach (FileInfo fi in rootDir.GetFiles())

  111             {

  112                 ProcessFile(fi);

  113             }

  114 

  115             foreach (DirectoryInfo di in rootDir.GetDirectories())

  116             {

  117                 ProcessFolder(di.FullName);

  118             }

  119         }

  120 

  121         private void ProcessFile(FileInfo fi)

  122         {

  123             string path = fi.FullName;

  124             using (StreamReader sr = new StreamReader(path))

  125             {

  126                 string fileContent = sr.ReadToEnd();

  127                 StringBuilder sb = new StringBuilder(fileContent);

  128                 int origLength = sb.Length;

  129                 foreach (string stringToRemove in selectedScripts)

  130                 {

  131                     sb.Replace(stringToRemove, String.Empty);

  132                 }

  133 

  134                 if (sb.Length != origLength)

  135                 {

  136                     string newFilePath = path.Replace(textBox1.Text, textBox2.Text);

  137                     string newFileDirectory = Path.GetDirectoryName(newFilePath);

  138                     if (!Directory.Exists(newFileDirectory))

  139                     {

  140                         Directory.CreateDirectory(newFileDirectory);

  141                     }

  142 

  143                     string newFileContent = sb.ToString();

  144                     using (StreamWriter sw = File.CreateText(newFilePath))

  145                     {

  146                         sw.Write(newFileContent);

  147                     }

  148                 }

  149             }

  150         }

Files for Download:

ScriptRemover_Executable.zip (11.11 kb)

 

ScriptRemover_Source.zip (10.57 kb)

Hope this helps in one way or another and as usual, feel free to make comments/corrections. This has been haphazardly made but tried my best to make it useful and working.

 

*** Note that this has some known limitations (due to the regex expression used):

1) script tags has spaces like <script>abc</script > (note that the end script tag has a script before >)

2) self closing script tags <script src="url" />

as there was no need for me to handle these cases, however should you need to handle them, feel free to drop me a message and I'll try to help out.

By the way, Happy 2009 everyone!


Posted

in

,

by

Tags: